Module Name: src
Committed By: ad
Date: Tue Dec 31 22:42:51 UTC 2019
Modified Files:
src/sys/kern: kern_idle.c
src/sys/miscfs/genfs: genfs_io.c
src/sys/ufs/lfs: lfs_pages.c lfs_vfsops.c ulfs_inode.c
src/sys/ufs/ufs: ufs_inode.c
src/sys/uvm: uvm.h uvm_anon.c uvm_aobj.c uvm_bio.c uvm_extern.h
uvm_fault.c uvm_glue.c uvm_loan.c uvm_map.c uvm_object.c uvm_page.c
uvm_page.h uvm_pager.c uvm_pdaemon.c uvm_pdpolicy.h
uvm_pdpolicy_clock.c uvm_pdpolicy_clockpro.c
Log Message:
- Add and use wrapper functions that take and acquire page interlocks, and pairs
of page interlocks. Require that the page interlock be held over calls to
uvm_pageactivate(), uvm_pagewire() and similar.
- Solve the concurrency problem with page replacement state. Rather than
updating the global state synchronously, set an intended state on
individual pages (active, inactive, enqueued, dequeued) while holding the
page interlock. After the interlock is released put the pages on a 128
entry per-CPU queue for their state changes to be made real in batch.
This results in in a ~400 fold decrease in contention on my test system.
Proposed on tech-kern but modified to use the page interlock rather than
atomics to synchronise as it's much easier to maintain that way, and
cheaper.
To generate a diff of this commit:
cvs rdiff -u -r1.28 -r1.29 src/sys/kern/kern_idle.c
cvs rdiff -u -r1.82 -r1.83 src/sys/miscfs/genfs/genfs_io.c
cvs rdiff -u -r1.18 -r1.19 src/sys/ufs/lfs/lfs_pages.c
cvs rdiff -u -r1.366 -r1.367 src/sys/ufs/lfs/lfs_vfsops.c
cvs rdiff -u -r1.22 -r1.23 src/sys/ufs/lfs/ulfs_inode.c
cvs rdiff -u -r1.106 -r1.107 src/sys/ufs/ufs/ufs_inode.c
cvs rdiff -u -r1.72 -r1.73 src/sys/uvm/uvm.h
cvs rdiff -u -r1.69 -r1.70 src/sys/uvm/uvm_anon.c
cvs rdiff -u -r1.132 -r1.133 src/sys/uvm/uvm_aobj.c
cvs rdiff -u -r1.101 -r1.102 src/sys/uvm/uvm_bio.c
cvs rdiff -u -r1.217 -r1.218 src/sys/uvm/uvm_extern.h
cvs rdiff -u -r1.213 -r1.214 src/sys/uvm/uvm_fault.c
cvs rdiff -u -r1.174 -r1.175 src/sys/uvm/uvm_glue.c
cvs rdiff -u -r1.92 -r1.93 src/sys/uvm/uvm_loan.c src/sys/uvm/uvm_page.h
cvs rdiff -u -r1.368 -r1.369 src/sys/uvm/uvm_map.c
cvs rdiff -u -r1.18 -r1.19 src/sys/uvm/uvm_object.c
cvs rdiff -u -r1.219 -r1.220 src/sys/uvm/uvm_page.c
cvs rdiff -u -r1.118 -r1.119 src/sys/uvm/uvm_pager.c
cvs rdiff -u -r1.121 -r1.122 src/sys/uvm/uvm_pdaemon.c
cvs rdiff -u -r1.5 -r1.6 src/sys/uvm/uvm_pdpolicy.h
cvs rdiff -u -r1.27 -r1.28 src/sys/uvm/uvm_pdpolicy_clock.c
cvs rdiff -u -r1.21 -r1.22 src/sys/uvm/uvm_pdpolicy_clockpro.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/kern_idle.c
diff -u src/sys/kern/kern_idle.c:1.28 src/sys/kern/kern_idle.c:1.29
--- src/sys/kern/kern_idle.c:1.28 Fri Dec 6 21:36:10 2019
+++ src/sys/kern/kern_idle.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_idle.c,v 1.28 2019/12/06 21:36:10 ad Exp $ */
+/* $NetBSD: kern_idle.c,v 1.29 2019/12/31 22:42:51 ad Exp $ */
/*-
* Copyright (c)2002, 2006, 2007 YAMAMOTO Takashi,
@@ -28,7 +28,7 @@
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_idle.c,v 1.28 2019/12/06 21:36:10 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_idle.c,v 1.29 2019/12/31 22:42:51 ad Exp $");
#include <sys/param.h>
#include <sys/cpu.h>
@@ -39,7 +39,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_idle.c,
#include <sys/proc.h>
#include <sys/atomic.h>
-#include <uvm/uvm.h> /* uvm_pageidlezero */
+#include <uvm/uvm.h> /* uvm_idle */
#include <uvm/uvm_extern.h>
void
@@ -81,7 +81,7 @@ idle_loop(void *dummy)
sched_idle();
if (!sched_curcpu_runnable_p()) {
if ((spc->spc_flags & SPCF_OFFLINE) == 0) {
- uvm_pageidlezero();
+ uvm_idle();
}
if (!sched_curcpu_runnable_p()) {
cpu_idle();
Index: src/sys/miscfs/genfs/genfs_io.c
diff -u src/sys/miscfs/genfs/genfs_io.c:1.82 src/sys/miscfs/genfs/genfs_io.c:1.83
--- src/sys/miscfs/genfs/genfs_io.c:1.82 Tue Dec 31 12:40:27 2019
+++ src/sys/miscfs/genfs/genfs_io.c Tue Dec 31 22:42:50 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: genfs_io.c,v 1.82 2019/12/31 12:40:27 ad Exp $ */
+/* $NetBSD: genfs_io.c,v 1.83 2019/12/31 22:42:50 ad Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.82 2019/12/31 12:40:27 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_io.c,v 1.83 2019/12/31 22:42:50 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -491,7 +491,9 @@ out:
uvm_pagefree(pg);
continue;
}
+ uvm_pagelock(pg);
uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
}
@@ -1164,14 +1166,18 @@ retry:
if (tpg->offset < startoff || tpg->offset >= endoff)
continue;
if (flags & PGO_DEACTIVATE && tpg->wire_count == 0) {
+ uvm_pagelock(tpg);
uvm_pagedeactivate(tpg);
+ uvm_pageunlock(tpg);
} else if (flags & PGO_FREE) {
pmap_page_protect(tpg, VM_PROT_NONE);
if (tpg->flags & PG_BUSY) {
tpg->flags |= freeflag;
if (pagedaemon) {
uvm_pageout_start(1);
+ uvm_pagelock(tpg);
uvm_pagedequeue(tpg);
+ uvm_pageunlock(tpg);
}
} else {
@@ -1603,7 +1609,9 @@ genfs_compat_getpages(void *v)
pg->flags |= PG_RELEASED;
} else {
pmap_clear_modify(pg);
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
}
}
if (error) {
Index: src/sys/ufs/lfs/lfs_pages.c
diff -u src/sys/ufs/lfs/lfs_pages.c:1.18 src/sys/ufs/lfs/lfs_pages.c:1.19
--- src/sys/ufs/lfs/lfs_pages.c:1.18 Fri Dec 20 20:54:48 2019
+++ src/sys/ufs/lfs/lfs_pages.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_pages.c,v 1.18 2019/12/20 20:54:48 ad Exp $ */
+/* $NetBSD: lfs_pages.c,v 1.19 2019/12/31 22:42:51 ad Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003, 2019 The NetBSD Foundation, Inc.
@@ -60,7 +60,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.18 2019/12/20 20:54:48 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.19 2019/12/31 22:42:51 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
@@ -338,7 +338,9 @@ check_dirty(struct lfs *fs, struct vnode
* Wire the page so that
* pdaemon doesn't see it again.
*/
+ uvm_pagelock(pg);
uvm_pagewire(pg);
+ uvm_pageunlock(pg);
/* Suspended write flag */
pg->flags |= PG_DELWRI;
@@ -495,7 +497,9 @@ retry:
"lfsput2", 0);
mutex_enter(vp->v_interlock);
}
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
}
ap->a_offlo = blkeof;
if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) {
Index: src/sys/ufs/lfs/lfs_vfsops.c
diff -u src/sys/ufs/lfs/lfs_vfsops.c:1.366 src/sys/ufs/lfs/lfs_vfsops.c:1.367
--- src/sys/ufs/lfs/lfs_vfsops.c:1.366 Fri Dec 13 20:10:22 2019
+++ src/sys/ufs/lfs/lfs_vfsops.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: lfs_vfsops.c,v 1.366 2019/12/13 20:10:22 ad Exp $ */
+/* $NetBSD: lfs_vfsops.c,v 1.367 2019/12/31 22:42:51 ad Exp $ */
/*-
* Copyright (c) 1999, 2000, 2001, 2002, 2003, 2007, 2007
@@ -61,7 +61,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.366 2019/12/13 20:10:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lfs_vfsops.c,v 1.367 2019/12/31 22:42:51 ad Exp $");
#if defined(_KERNEL_OPT)
#include "opt_lfs.h"
@@ -2054,7 +2054,9 @@ lfs_gop_write(struct vnode *vp, struct v
pgs[i]->flags |= PG_PAGEOUT;
uvm_pageout_start(1);
mutex_enter(vp->v_interlock);
+ uvm_pagelock(pgs[i]);
uvm_pageunwire(pgs[i]);
+ uvm_pageunlock(pgs[i]);
mutex_exit(vp->v_interlock);
}
}
@@ -2241,10 +2243,12 @@ lfs_gop_write(struct vnode *vp, struct v
if (pg->flags & PG_PAGEOUT)
uvm_pageout_done(1);
+ uvm_pagelock(pg);
if (pg->flags & PG_DELWRI) {
uvm_pageunwire(pg);
}
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
pg->flags &= ~(PG_CLEAN|PG_DELWRI|PG_PAGEOUT|PG_RELEASED);
DLOG((DLOG_PAGE, "pg[%d] = %p (vp %p off %" PRIx64 ")\n", i, pg,
vp, pg->offset));
Index: src/sys/ufs/lfs/ulfs_inode.c
diff -u src/sys/ufs/lfs/ulfs_inode.c:1.22 src/sys/ufs/lfs/ulfs_inode.c:1.23
--- src/sys/ufs/lfs/ulfs_inode.c:1.22 Fri Dec 13 20:10:22 2019
+++ src/sys/ufs/lfs/ulfs_inode.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: ulfs_inode.c,v 1.22 2019/12/13 20:10:22 ad Exp $ */
+/* $NetBSD: ulfs_inode.c,v 1.23 2019/12/31 22:42:51 ad Exp $ */
/* from NetBSD: ufs_inode.c,v 1.95 2015/06/13 14:56:45 hannken Exp */
/*
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ulfs_inode.c,v 1.22 2019/12/13 20:10:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ulfs_inode.c,v 1.23 2019/12/31 22:42:51 ad Exp $");
#if defined(_KERNEL_OPT)
#include "opt_lfs.h"
@@ -243,7 +243,9 @@ ulfs_balloc_range(struct vnode *vp, off_
}
pgs[i]->flags &= ~PG_CLEAN;
}
+ uvm_pagelock(pgs[i]);
uvm_pageactivate(pgs[i]);
+ uvm_pageunlock(pgs[i]);
}
uvm_page_unbusy(pgs, npages);
mutex_exit(uobj->vmobjlock);
Index: src/sys/ufs/ufs/ufs_inode.c
diff -u src/sys/ufs/ufs/ufs_inode.c:1.106 src/sys/ufs/ufs/ufs_inode.c:1.107
--- src/sys/ufs/ufs/ufs_inode.c:1.106 Fri Dec 13 20:10:22 2019
+++ src/sys/ufs/ufs/ufs_inode.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: ufs_inode.c,v 1.106 2019/12/13 20:10:22 ad Exp $ */
+/* $NetBSD: ufs_inode.c,v 1.107 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.106 2019/12/13 20:10:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ufs_inode.c,v 1.107 2019/12/31 22:42:51 ad Exp $");
#if defined(_KERNEL_OPT)
#include "opt_ffs.h"
@@ -279,7 +279,9 @@ ufs_balloc_range(struct vnode *vp, off_t
}
pgs[i]->flags &= ~PG_CLEAN;
}
+ uvm_pagelock(pgs[i]);
uvm_pageactivate(pgs[i]);
+ uvm_pageunlock(pgs[i]);
}
uvm_page_unbusy(pgs, npages);
mutex_exit(uobj->vmobjlock);
Index: src/sys/uvm/uvm.h
diff -u src/sys/uvm/uvm.h:1.72 src/sys/uvm/uvm.h:1.73
--- src/sys/uvm/uvm.h:1.72 Fri Dec 27 13:19:24 2019
+++ src/sys/uvm/uvm.h Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm.h,v 1.72 2019/12/27 13:19:24 ad Exp $ */
+/* $NetBSD: uvm.h,v 1.73 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -87,6 +87,12 @@ struct uvm_cpu {
/* entropy */
krndsource_t rs; /* entropy source */
+
+ /* uvmpdpol: queue of intended page status changes. */
+ struct vm_page **pdq; /* queue entries */
+ u_int pdqhead; /* current queue head */
+ u_int pdqtail; /* maximum number entries */
+ int pdqtime; /* last time queue cleared */
};
/*
Index: src/sys/uvm/uvm_anon.c
diff -u src/sys/uvm/uvm_anon.c:1.69 src/sys/uvm/uvm_anon.c:1.70
--- src/sys/uvm/uvm_anon.c:1.69 Fri Dec 13 20:10:22 2019
+++ src/sys/uvm/uvm_anon.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_anon.c,v 1.69 2019/12/13 20:10:22 ad Exp $ */
+/* $NetBSD: uvm_anon.c,v 1.70 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.69 2019/12/13 20:10:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_anon.c,v 1.70 2019/12/31 22:42:51 ad Exp $");
#include "opt_uvmhist.h"
@@ -352,7 +352,9 @@ uvm_anon_pagein(struct vm_amap *amap, st
* Deactivate the page (to put it on a page queue).
*/
+ uvm_pagelock(pg);
uvm_pagedeactivate(pg);
+ uvm_pageunlock(pg);
if (pg->flags & PG_WANTED) {
pg->flags &= ~PG_WANTED;
wakeup(pg);
Index: src/sys/uvm/uvm_aobj.c
diff -u src/sys/uvm/uvm_aobj.c:1.132 src/sys/uvm/uvm_aobj.c:1.133
--- src/sys/uvm/uvm_aobj.c:1.132 Sun Dec 15 21:11:35 2019
+++ src/sys/uvm/uvm_aobj.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_aobj.c,v 1.132 2019/12/15 21:11:35 ad Exp $ */
+/* $NetBSD: uvm_aobj.c,v 1.133 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.132 2019/12/15 21:11:35 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.133 2019/12/31 22:42:51 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_uvmhist.h"
@@ -738,7 +738,9 @@ uao_put(struct uvm_object *uobj, voff_t
case PGO_CLEANIT|PGO_DEACTIVATE:
case PGO_DEACTIVATE:
deactivate_it:
+ uvm_pagelock(pg);
uvm_pagedeactivate(pg);
+ uvm_pageunlock(pg);
break;
case PGO_FREE:
@@ -1299,7 +1301,9 @@ uao_pagein_page(struct uvm_aobj *aobj, i
/*
* make sure it's on a page queue.
*/
+ uvm_pagelock(pg);
uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
if (pg->flags & PG_WANTED) {
wakeup(pg);
Index: src/sys/uvm/uvm_bio.c
diff -u src/sys/uvm/uvm_bio.c:1.101 src/sys/uvm/uvm_bio.c:1.102
--- src/sys/uvm/uvm_bio.c:1.101 Fri Dec 13 20:10:22 2019
+++ src/sys/uvm/uvm_bio.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_bio.c,v 1.101 2019/12/13 20:10:22 ad Exp $ */
+/* $NetBSD: uvm_bio.c,v 1.102 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1998 Chuck Silvers.
@@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.101 2019/12/13 20:10:22 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_bio.c,v 1.102 2019/12/31 22:42:51 ad Exp $");
#include "opt_uvmhist.h"
#include "opt_ubc.h"
@@ -285,7 +285,9 @@ ubc_fault_page(const struct uvm_faultinf
error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg),
prot & mask, PMAP_CANFAIL | (access_type & mask));
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
pg->flags &= ~(PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
@@ -665,7 +667,9 @@ ubc_release(void *va, int flags)
pgs[i] = PHYS_TO_VM_PAGE(pa);
pgs[i]->flags &= ~(PG_FAKE|PG_CLEAN);
KASSERT(pgs[i]->loan_count == 0);
+ uvm_pagelock(pgs[i]);
uvm_pageactivate(pgs[i]);
+ uvm_pageunlock(pgs[i]);
}
pmap_kremove(umapva, ubc_winsize);
pmap_update(pmap_kernel());
@@ -888,7 +892,9 @@ ubc_direct_release(struct uvm_object *uo
for (int i = 0; i < npages; i++) {
struct vm_page *pg = pgs[i];
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
/* Page was changed, no longer fake and neither clean */
if (flags & UBC_WRITE)
Index: src/sys/uvm/uvm_extern.h
diff -u src/sys/uvm/uvm_extern.h:1.217 src/sys/uvm/uvm_extern.h:1.218
--- src/sys/uvm/uvm_extern.h:1.217 Tue Dec 31 13:07:14 2019
+++ src/sys/uvm/uvm_extern.h Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_extern.h,v 1.217 2019/12/31 13:07:14 ad Exp $ */
+/* $NetBSD: uvm_extern.h,v 1.218 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -639,6 +639,7 @@ int uvm_coredump_walkmap(struct proc *
int uvm_coredump_count_segs(struct proc *);
void uvm_proc_exit(struct proc *);
void uvm_lwp_exit(struct lwp *);
+void uvm_idle(void);
void uvm_init_limits(struct proc *);
bool uvm_kernacc(void *, size_t, vm_prot_t);
__dead void uvm_scheduler(void);
Index: src/sys/uvm/uvm_fault.c
diff -u src/sys/uvm/uvm_fault.c:1.213 src/sys/uvm/uvm_fault.c:1.214
--- src/sys/uvm/uvm_fault.c:1.213 Mon Dec 16 22:47:55 2019
+++ src/sys/uvm/uvm_fault.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_fault.c,v 1.213 2019/12/16 22:47:55 ad Exp $ */
+/* $NetBSD: uvm_fault.c,v 1.214 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.213 2019/12/16 22:47:55 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.214 2019/12/31 22:42:51 ad Exp $");
#include "opt_uvmhist.h"
@@ -197,7 +197,9 @@ uvmfault_anonflush(struct vm_anon **anon
KASSERT(mutex_owned(anons[lcv]->an_lock));
pg = anons[lcv]->an_page;
if (pg && (pg->flags & PG_BUSY) == 0) {
+ uvm_pagelock(pg);
uvm_pagedeactivate(pg);
+ uvm_pageunlock(pg);
}
}
}
@@ -482,7 +484,9 @@ released:
* We have successfully read the page, activate it.
*/
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
#else
@@ -1252,7 +1256,9 @@ uvm_fault_upper_neighbor(
/* locked: amap, anon */
+ uvm_pagelock(pg);
uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
UVMHIST_LOG(maphist,
" MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx",
(uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@@ -1469,7 +1475,9 @@ uvm_fault_upper_promote(
pg = anon->an_page;
/* uvm_fault_upper_done will activate the page */
+ uvm_pagelock(pg);
uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
pg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
@@ -1601,6 +1609,7 @@ uvm_fault_upper_done(
* ... update the page queues.
*/
+ uvm_pagelock(pg);
if (wire_paging) {
uvm_pagewire(pg);
@@ -1615,6 +1624,7 @@ uvm_fault_upper_done(
} else {
uvm_pageactivate(pg);
}
+ uvm_pageunlock(pg);
if (wire_paging) {
uvm_anon_dropswap(anon);
@@ -1833,7 +1843,9 @@ uvm_fault_lower_neighbor(
* for this. we can just directly enter the pages.
*/
+ uvm_pagelock(pg);
uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
UVMHIST_LOG(maphist,
" MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx",
(uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0);
@@ -1950,7 +1962,9 @@ uvm_fault_lower_io(
mutex_enter(uobj->vmobjlock);
KASSERT((pg->flags & PG_BUSY) != 0);
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
/* locked(locked): maps(read), amap(if !null), uobj, pg */
/* locked(!locked): uobj, pg */
@@ -2249,7 +2263,9 @@ uvm_fault_lower_enter(
* we just promoted the page.
*/
+ uvm_pagelock(pg);
uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
if (pg->flags & PG_WANTED)
wakeup(pg);
@@ -2308,6 +2324,7 @@ uvm_fault_lower_done(
UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist);
+ uvm_pagelock(pg);
if (flt->wire_paging) {
uvm_pagewire(pg);
if (pg->flags & PG_AOBJ) {
@@ -2326,6 +2343,7 @@ uvm_fault_lower_done(
} else {
uvm_pageactivate(pg);
}
+ uvm_pageunlock(pg);
if (dropswap) {
uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
@@ -2455,8 +2473,11 @@ uvm_fault_unwire_locked(struct vm_map *m
pmap_unwire(pmap, va);
pg = PHYS_TO_VM_PAGE(pa);
- if (pg)
+ if (pg) {
+ uvm_pagelock(pg);
uvm_pageunwire(pg);
+ uvm_pageunlock(pg);
+ }
}
if (oentry != NULL) {
Index: src/sys/uvm/uvm_glue.c
diff -u src/sys/uvm/uvm_glue.c:1.174 src/sys/uvm/uvm_glue.c:1.175
--- src/sys/uvm/uvm_glue.c:1.174 Tue Dec 31 13:07:14 2019
+++ src/sys/uvm/uvm_glue.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_glue.c,v 1.174 2019/12/31 13:07:14 ad Exp $ */
+/* $NetBSD: uvm_glue.c,v 1.175 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.174 2019/12/31 13:07:14 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.175 2019/12/31 22:42:51 ad Exp $");
#include "opt_kgdb.h"
#include "opt_kstack.h"
@@ -86,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v
#include <sys/asan.h>
#include <uvm/uvm.h>
+#include <uvm/uvm_pdpolicy.h>
#include <uvm/uvm_pgflcache.h>
/*
@@ -516,3 +517,22 @@ uvm_scheduler(void)
(void)kpause("uvm", false, hz, NULL);
}
}
+
+/*
+ * uvm_idle: called from the idle loop.
+ */
+
+void
+uvm_idle(void)
+{
+ struct cpu_info *ci = curcpu();
+ struct uvm_cpu *ucpu = ci->ci_data.cpu_uvm;
+
+ KASSERT(kpreempt_disabled());
+
+ if (!ci->ci_want_resched)
+ uvmpdpol_idle(ucpu);
+ if (!ci->ci_want_resched)
+ uvm_pageidlezero();
+
+}
Index: src/sys/uvm/uvm_loan.c
diff -u src/sys/uvm/uvm_loan.c:1.92 src/sys/uvm/uvm_loan.c:1.93
--- src/sys/uvm/uvm_loan.c:1.92 Wed Dec 18 20:38:14 2019
+++ src/sys/uvm/uvm_loan.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_loan.c,v 1.92 2019/12/18 20:38:14 ad Exp $ */
+/* $NetBSD: uvm_loan.c,v 1.93 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.92 2019/12/18 20:38:14 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_loan.c,v 1.93 2019/12/31 22:42:51 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -421,11 +421,11 @@ uvm_loananon(struct uvm_faultinfo *ufi,
if (pg->loan_count == 0) {
pmap_page_protect(pg, VM_PROT_READ);
}
- mutex_enter(&pg->interlock);
+ uvm_pagelock(pg);
pg->loan_count++;
KASSERT(pg->loan_count > 0); /* detect wrap-around */
- mutex_exit(&pg->interlock);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
**output = pg;
(*output)++;
@@ -471,11 +471,11 @@ uvm_loanpage(struct vm_page **pgpp, int
if (pg->loan_count == 0) {
pmap_page_protect(pg, VM_PROT_READ);
}
- mutex_enter(&pg->interlock);
+ uvm_pagelock(pg);
pg->loan_count++;
KASSERT(pg->loan_count > 0); /* detect wrap-around */
- mutex_exit(&pg->interlock);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
}
uvm_page_unbusy(pgpp, npages);
@@ -713,7 +713,9 @@ uvm_loanuobj(struct uvm_faultinfo *ufi,
mutex_exit(uobj->vmobjlock);
return (0);
}
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
pg->flags &= ~(PG_BUSY|PG_WANTED);
UVM_PAGE_OWN(pg, NULL);
mutex_exit(uobj->vmobjlock);
@@ -778,14 +780,14 @@ uvm_loanuobj(struct uvm_faultinfo *ufi,
if (pg->loan_count == 0) {
pmap_page_protect(pg, VM_PROT_READ);
}
- mutex_enter(&pg->interlock);
+ uvm_pagelock(pg);
pg->loan_count++;
KASSERT(pg->loan_count > 0); /* detect wrap-around */
pg->uanon = anon;
anon->an_page = pg;
anon->an_lock = /* TODO: share amap lock */
- mutex_exit(&pg->interlock);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
if (pg->flags & PG_WANTED) {
wakeup(pg);
}
@@ -863,7 +865,9 @@ again:
/* got a zero'd page. */
pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
pg->flags |= PG_RDONLY;
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
UVM_PAGE_OWN(pg, NULL);
}
@@ -909,11 +913,11 @@ again:
}
anon->an_page = pg;
pg->uanon = anon;
- mutex_enter(&pg->interlock);
+ uvm_pagelock(pg);
pg->loan_count++;
KASSERT(pg->loan_count > 0); /* detect wrap-around */
- mutex_exit(&pg->interlock);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
mutex_exit(&anon->an_lock);
mutex_exit(uvm_loanzero_object.vmobjlock);
**output = anon;
@@ -1063,11 +1067,13 @@ ulz_put(struct uvm_object *uobj, voff_t
pg = uvm_pagelookup(uobj, 0);
KASSERT(pg != NULL);
+ uvm_pagelock(pg);
if (pg->uanon) {
uvm_pageactivate(pg);
} else {
uvm_pagedequeue(pg);
}
+ uvm_pageunlock(pg);
mutex_exit(uobj->vmobjlock);
return 0;
@@ -1147,6 +1153,8 @@ uvm_loanbreak(struct vm_page *uobjpage)
* an anon (i.e. we are breaking an O->K
* loan), then remove it from any pageq's.
*/
+
+ uvm_pagelock2(uobjpage, pg);
if (uobjpage->uanon == NULL)
uvm_pagedequeue(uobjpage);
@@ -1162,6 +1170,7 @@ uvm_loanbreak(struct vm_page *uobjpage)
*/
uvm_pageactivate(pg);
+ uvm_pageunlock2(uobjpage, pg);
/*
* done! loan is broken and "pg" is
@@ -1186,6 +1195,13 @@ uvm_loanbreak_anon(struct vm_anon *anon,
}
oldpg = anon->an_page;
+ /* copy old -> new */
+ uvm_pagecopy(oldpg, newpg);
+
+ /* force reload */
+ pmap_page_protect(oldpg, VM_PROT_NONE);
+
+ uvm_pagelock2(oldpg, newpg);
if (uobj == NULL) {
/*
* we were the lender (A->K); need to remove the page from
@@ -1193,19 +1209,6 @@ uvm_loanbreak_anon(struct vm_anon *anon,
*/
uvm_pagedequeue(oldpg);
}
-
- /* copy old -> new */
- uvm_pagecopy(oldpg, newpg);
-
- /* force reload */
- pmap_page_protect(oldpg, VM_PROT_NONE);
- if (newpg < oldpg) {
- mutex_enter(&newpg->interlock);
- mutex_enter(&oldpg->interlock);
- } else {
- mutex_enter(&oldpg->interlock);
- mutex_enter(&newpg->interlock);
- }
oldpg->uanon = NULL;
/* in case we owned */
oldpg->flags &= ~PG_ANON;
@@ -1220,9 +1223,8 @@ uvm_loanbreak_anon(struct vm_anon *anon,
newpg->uanon = anon;
newpg->flags |= PG_ANON;
- mutex_exit(&newpg->interlock);
- mutex_exit(&oldpg->interlock);
uvm_pageactivate(newpg);
+ uvm_pageunlock2(oldpg, newpg);
newpg->flags &= ~(PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(newpg, NULL);
Index: src/sys/uvm/uvm_page.h
diff -u src/sys/uvm/uvm_page.h:1.92 src/sys/uvm/uvm_page.h:1.93
--- src/sys/uvm/uvm_page.h:1.92 Tue Dec 31 17:56:16 2019
+++ src/sys/uvm/uvm_page.h Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_page.h,v 1.92 2019/12/31 17:56:16 ad Exp $ */
+/* $NetBSD: uvm_page.h,v 1.93 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -266,6 +266,24 @@ struct vm_page {
"\11AOBJ\12AOBJ\13READAHEAD\14FREE\15MARKER\16PAGER1\17ZERO"
/*
+ * uvmpdpol state flags.
+ *
+ * => may only be changed with pg->interlock held.
+ * => changing them is the responsibility of uvmpdpol ..
+ * => .. but uvm_page needs to know about them in order to purge updates.
+ * => PQ_PRIVATE is private to the individual uvmpdpol implementation.
+ */
+
+#define PQ_INTENT_A 0x00000000 /* intend activation */
+#define PQ_INTENT_I 0x00000001 /* intend deactivation */
+#define PQ_INTENT_E 0x00000002 /* intend enqueue */
+#define PQ_INTENT_D 0x00000003 /* intend dequeue */
+#define PQ_INTENT_MASK 0x00000003 /* mask of intended state */
+#define PQ_INTENT_SET 0x00000004 /* not realized yet */
+#define PQ_INTENT_QUEUED 0x00000008 /* queued for processing */
+#define PQ_PRIVATE 0xfffffff0
+
+/*
* physical memory layout structure
*
* MD vmparam.h must #define:
@@ -312,6 +330,10 @@ void uvm_pagedeactivate(struct vm_page *
void uvm_pagedequeue(struct vm_page *);
void uvm_pageenqueue(struct vm_page *);
void uvm_pagefree(struct vm_page *);
+void uvm_pagelock(struct vm_page *);
+void uvm_pagelock2(struct vm_page *, struct vm_page *);
+void uvm_pageunlock(struct vm_page *);
+void uvm_pageunlock2(struct vm_page *, struct vm_page *);
void uvm_page_unbusy(struct vm_page **, int);
struct vm_page *uvm_pagelookup(struct uvm_object *, voff_t);
void uvm_pageunwire(struct vm_page *);
Index: src/sys/uvm/uvm_map.c
diff -u src/sys/uvm/uvm_map.c:1.368 src/sys/uvm/uvm_map.c:1.369
--- src/sys/uvm/uvm_map.c:1.368 Fri Dec 27 10:17:57 2019
+++ src/sys/uvm/uvm_map.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_map.c,v 1.368 2019/12/27 10:17:57 msaitoh Exp $ */
+/* $NetBSD: uvm_map.c,v 1.369 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.368 2019/12/27 10:17:57 msaitoh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.369 2019/12/31 22:42:51 ad Exp $");
#include "opt_ddb.h"
#include "opt_pax.h"
@@ -3949,7 +3949,9 @@ uvm_map_clean(struct vm_map *map, vaddr_
continue;
}
KASSERT(pg->uanon == anon);
+ uvm_pagelock(pg);
uvm_pagedeactivate(pg);
+ uvm_pageunlock(pg);
continue;
case PGO_FREE:
Index: src/sys/uvm/uvm_object.c
diff -u src/sys/uvm/uvm_object.c:1.18 src/sys/uvm/uvm_object.c:1.19
--- src/sys/uvm/uvm_object.c:1.18 Sun Dec 15 21:11:35 2019
+++ src/sys/uvm/uvm_object.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_object.c,v 1.18 2019/12/15 21:11:35 ad Exp $ */
+/* $NetBSD: uvm_object.c,v 1.19 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 2006, 2010, 2019 The NetBSD Foundation, Inc.
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.18 2019/12/15 21:11:35 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_object.c,v 1.19 2019/12/31 22:42:51 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
@@ -181,7 +181,9 @@ uvm_obj_wirepages(struct uvm_object *uob
/* Wire the pages */
for (i = 0; i < npages; i++) {
+ uvm_pagelock(pgs[i]);
uvm_pagewire(pgs[i]);
+ uvm_pageunlock(pgs[i]);
if (list != NULL)
TAILQ_INSERT_TAIL(list, pgs[i], pageq.queue);
}
@@ -223,7 +225,9 @@ uvm_obj_unwirepages(struct uvm_object *u
KASSERT(pg != NULL);
KASSERT(!(pg->flags & PG_RELEASED));
+ uvm_pagelock(pg);
uvm_pageunwire(pg);
+ uvm_pageunlock(pg);
}
mutex_exit(uobj->vmobjlock);
}
Index: src/sys/uvm/uvm_page.c
diff -u src/sys/uvm/uvm_page.c:1.219 src/sys/uvm/uvm_page.c:1.220
--- src/sys/uvm/uvm_page.c:1.219 Tue Dec 31 13:07:14 2019
+++ src/sys/uvm/uvm_page.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_page.c,v 1.219 2019/12/31 13:07:14 ad Exp $ */
+/* $NetBSD: uvm_page.c,v 1.220 2019/12/31 22:42:51 ad Exp $ */
/*-
* Copyright (c) 2019 The NetBSD Foundation, Inc.
@@ -95,7 +95,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.219 2019/12/31 13:07:14 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_page.c,v 1.220 2019/12/31 22:42:51 ad Exp $");
#include "opt_ddb.h"
#include "opt_uvm.h"
@@ -984,6 +984,8 @@ uvm_cpu_attach(struct cpu_info *ci)
ucpu = ci->ci_data.cpu_uvm;
}
+ uvmpdpol_init_cpu(ucpu);
+
/*
* Attach RNG source for this CPU's VM events
*/
@@ -1345,6 +1347,7 @@ uvm_pagealloc_strat(struct uvm_object *o
* uvm_pagereplace: replace a page with another
*
* => object must be locked
+ * => page interlocks must be held
*/
void
@@ -1358,25 +1361,17 @@ uvm_pagereplace(struct vm_page *oldpg, s
KASSERT((newpg->flags & PG_TABLED) == 0);
KASSERT(newpg->uobject == NULL);
KASSERT(mutex_owned(uobj->vmobjlock));
+ KASSERT(mutex_owned(&oldpg->interlock));
+ KASSERT(mutex_owned(&newpg->interlock));
newpg->offset = oldpg->offset;
pg = radix_tree_replace_node(&uobj->uo_pages,
newpg->offset >> PAGE_SHIFT, newpg);
KASSERT(pg == oldpg);
- /* take page interlocks during rename */
- if (oldpg < newpg) {
- mutex_enter(&oldpg->interlock);
- mutex_enter(&newpg->interlock);
- } else {
- mutex_enter(&newpg->interlock);
- mutex_enter(&oldpg->interlock);
- }
newpg->uobject = uobj;
uvm_pageinsert_object(uobj, newpg);
uvm_pageremove_object(uobj, oldpg);
- mutex_exit(&oldpg->interlock);
- mutex_exit(&newpg->interlock);
}
/*
@@ -1502,7 +1497,7 @@ uvm_pagefree(struct vm_page *pg)
* unbusy the page, and we're done.
*/
- mutex_enter(&pg->interlock);
+ uvm_pagelock(pg);
locked = true;
if (pg->uobject != NULL) {
uvm_pageremove_object(pg->uobject, pg);
@@ -1526,15 +1521,15 @@ uvm_pagefree(struct vm_page *pg)
#endif
if (pg->loan_count) {
KASSERT(pg->uobject == NULL);
- mutex_exit(&pg->interlock);
if (pg->uanon == NULL) {
uvm_pagedequeue(pg);
}
+ uvm_pageunlock(pg);
return;
}
} else if (pg->uobject != NULL || pg->uanon != NULL ||
pg->wire_count != 0) {
- mutex_enter(&pg->interlock);
+ uvm_pagelock(pg);
locked = true;
} else {
locked = false;
@@ -1560,15 +1555,16 @@ uvm_pagefree(struct vm_page *pg)
atomic_dec_uint(&uvmexp.wired);
}
if (locked) {
- mutex_exit(&pg->interlock);
+ /*
+ * now remove the page from the queues.
+ */
+ uvm_pagedequeue(pg);
+ uvm_pageunlock(pg);
+ } else {
+ KASSERT(!uvmpdpol_pageisqueued_p(pg));
}
/*
- * now remove the page from the queues.
- */
- uvm_pagedequeue(pg);
-
- /*
* and put on free queue
*/
@@ -1744,6 +1740,7 @@ uvm_pagelookup(struct uvm_object *obj, v
* uvm_pagewire: wire the page, thus removing it from the daemon's grasp
*
* => caller must lock objects
+ * => caller must hold pg->interlock
*/
void
@@ -1751,6 +1748,7 @@ uvm_pagewire(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
#if defined(READAHEAD_STATS)
if ((pg->flags & PG_READAHEAD) != 0) {
uvm_ra_hit.ev_count++;
@@ -1761,9 +1759,7 @@ uvm_pagewire(struct vm_page *pg)
uvm_pagedequeue(pg);
atomic_inc_uint(&uvmexp.wired);
}
- mutex_enter(&pg->interlock);
pg->wire_count++;
- mutex_exit(&pg->interlock);
KASSERT(pg->wire_count > 0); /* detect wraparound */
}
@@ -1772,6 +1768,7 @@ uvm_pagewire(struct vm_page *pg)
*
* => activate if wire count goes to zero.
* => caller must lock objects
+ * => caller must hold pg->interlock
*/
void
@@ -1781,9 +1778,8 @@ uvm_pageunwire(struct vm_page *pg)
KASSERT(uvm_page_owner_locked_p(pg));
KASSERT(pg->wire_count != 0);
KASSERT(!uvmpdpol_pageisqueued_p(pg));
- mutex_enter(&pg->interlock);
+ KASSERT(mutex_owned(&pg->interlock));
pg->wire_count--;
- mutex_exit(&pg->interlock);
if (pg->wire_count == 0) {
uvm_pageactivate(pg);
KASSERT(uvmexp.wired != 0);
@@ -1798,6 +1794,7 @@ uvm_pageunwire(struct vm_page *pg)
* => caller must check to make sure page is not wired
* => object that page belongs to must be locked (so we can adjust pg->flags)
* => caller must clear the reference on the page before calling
+ * => caller must hold pg->interlock
*/
void
@@ -1805,6 +1802,7 @@ uvm_pagedeactivate(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
if (pg->wire_count == 0) {
KASSERT(uvmpdpol_pageisqueued_p(pg));
uvmpdpol_pagedeactivate(pg);
@@ -1815,6 +1813,7 @@ uvm_pagedeactivate(struct vm_page *pg)
* uvm_pageactivate: activate page
*
* => caller must lock objects
+ * => caller must hold pg->interlock
*/
void
@@ -1822,6 +1821,7 @@ uvm_pageactivate(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
#if defined(READAHEAD_STATS)
if ((pg->flags & PG_READAHEAD) != 0) {
uvm_ra_hit.ev_count++;
@@ -1837,12 +1837,14 @@ uvm_pageactivate(struct vm_page *pg)
* uvm_pagedequeue: remove a page from any paging queue
*
* => caller must lock objects
+ * => caller must hold pg->interlock
*/
void
uvm_pagedequeue(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
if (uvmpdpol_pageisqueued_p(pg)) {
uvmpdpol_pagedequeue(pg);
}
@@ -1853,18 +1855,103 @@ uvm_pagedequeue(struct vm_page *pg)
* used where a page is not really demanded (yet). eg. read-ahead
*
* => caller must lock objects
+ * => caller must hold pg->interlock
*/
void
uvm_pageenqueue(struct vm_page *pg)
{
KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) {
uvmpdpol_pageenqueue(pg);
}
}
/*
+ * uvm_pagelock: acquire page interlock
+ */
+void
+uvm_pagelock(struct vm_page *pg)
+{
+
+ mutex_enter(&pg->interlock);
+}
+
+/*
+ * uvm_pagelock2: acquire two page interlocks
+ */
+void
+uvm_pagelock2(struct vm_page *pg1, struct vm_page *pg2)
+{
+
+ if (pg1 < pg2) {
+ mutex_enter(&pg1->interlock);
+ mutex_enter(&pg2->interlock);
+ } else {
+ mutex_enter(&pg2->interlock);
+ mutex_enter(&pg1->interlock);
+ }
+}
+
+/*
+ * uvm_pageunlock: release page interlock, and if a page replacement intent
+ * is set on the page, pass it to uvmpdpol to make real.
+ *
+ * => caller must hold pg->interlock
+ */
+void
+uvm_pageunlock(struct vm_page *pg)
+{
+
+ if ((pg->pqflags & PQ_INTENT_SET) == 0 ||
+ (pg->pqflags & PQ_INTENT_QUEUED) != 0) {
+ mutex_exit(&pg->interlock);
+ return;
+ }
+ pg->pqflags |= PQ_INTENT_QUEUED;
+ mutex_exit(&pg->interlock);
+ uvmpdpol_pagerealize(pg);
+}
+
+/*
+ * uvm_pageunlock2: release two page interlocks, and for both pages if a
+ * page replacement intent is set on the page, pass it to uvmpdpol to make
+ * real.
+ *
+ * => caller must hold pg->interlock
+ */
+void
+uvm_pageunlock2(struct vm_page *pg1, struct vm_page *pg2)
+{
+
+ if ((pg1->pqflags & PQ_INTENT_SET) == 0 ||
+ (pg1->pqflags & PQ_INTENT_QUEUED) != 0) {
+ mutex_exit(&pg1->interlock);
+ pg1 = NULL;
+ } else {
+ pg1->pqflags |= PQ_INTENT_QUEUED;
+ mutex_exit(&pg1->interlock);
+ }
+
+ if ((pg2->pqflags & PQ_INTENT_SET) == 0 ||
+ (pg2->pqflags & PQ_INTENT_QUEUED) != 0) {
+ mutex_exit(&pg2->interlock);
+ pg2 = NULL;
+ } else {
+ pg2->pqflags |= PQ_INTENT_QUEUED;
+ mutex_exit(&pg2->interlock);
+ }
+
+ if (pg1 != NULL) {
+ uvmpdpol_pagerealize(pg1);
+ }
+ if (pg2 != NULL) {
+ uvmpdpol_pagerealize(pg2);
+ }
+}
+
+/*
* uvm_pagezero: zero fill a page
*
* => if page is part of an object then the object should be locked
Index: src/sys/uvm/uvm_pager.c
diff -u src/sys/uvm/uvm_pager.c:1.118 src/sys/uvm/uvm_pager.c:1.119
--- src/sys/uvm/uvm_pager.c:1.118 Fri Dec 27 00:46:38 2019
+++ src/sys/uvm/uvm_pager.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pager.c,v 1.118 2019/12/27 00:46:38 ad Exp $ */
+/* $NetBSD: uvm_pager.c,v 1.119 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.118 2019/12/27 00:46:38 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pager.c,v 1.119 2019/12/31 22:42:51 ad Exp $");
#include "opt_uvmhist.h"
#include "opt_readahead.h"
@@ -387,7 +387,9 @@ uvm_aio_aiodone_pages(struct vm_page **p
pageout_done++;
}
pg->flags &= ~PG_CLEAN;
+ uvm_pagelock(pg);
uvm_pageactivate(pg);
+ uvm_pageunlock(pg);
slot = 0;
} else
slot = SWSLOT_BAD;
@@ -423,7 +425,9 @@ uvm_aio_aiodone_pages(struct vm_page **p
uvm_ra_total.ev_count++;
#endif /* defined(READAHEAD_STATS) */
KASSERT((pg->flags & PG_CLEAN) != 0);
+ uvm_pagelock(pg);
uvm_pageenqueue(pg);
+ uvm_pageunlock(pg);
pmap_clear_modify(pg);
}
Index: src/sys/uvm/uvm_pdaemon.c
diff -u src/sys/uvm/uvm_pdaemon.c:1.121 src/sys/uvm/uvm_pdaemon.c:1.122
--- src/sys/uvm/uvm_pdaemon.c:1.121 Tue Dec 31 13:07:14 2019
+++ src/sys/uvm/uvm_pdaemon.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pdaemon.c,v 1.121 2019/12/31 13:07:14 ad Exp $ */
+/* $NetBSD: uvm_pdaemon.c,v 1.122 2019/12/31 22:42:51 ad Exp $ */
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.121 2019/12/31 13:07:14 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.122 2019/12/31 22:42:51 ad Exp $");
#include "opt_uvmhist.h"
#include "opt_readahead.h"
@@ -818,7 +818,9 @@ uvmpd_scan_queue(void)
if (swapcluster_allocslots(&swc)) {
dirtyreacts++;
+ uvm_pagelock(p);
uvm_pageactivate(p);
+ uvm_pageunlock(p);
mutex_exit(slock);
continue;
}
@@ -836,7 +838,9 @@ uvmpd_scan_queue(void)
p->flags |= PG_PAGEOUT;
uvmexp.pgswapout++;
+ uvm_pagelock(p);
uvm_pagedequeue(p);
+ uvm_pageunlock(p);
/*
* add the new page to the cluster.
@@ -846,7 +850,9 @@ uvmpd_scan_queue(void)
p->flags &= ~(PG_BUSY|PG_PAGEOUT);
UVM_PAGE_OWN(p, NULL);
dirtyreacts++;
+ uvm_pagelock(p);
uvm_pageactivate(p);
+ uvm_pageunlock(p);
mutex_exit(slock);
continue;
}
@@ -862,7 +868,9 @@ uvmpd_scan_queue(void)
atomic_inc_uint(&uvmexp.pdpending);
#else /* defined(VMSWAP) */
+ uvm_pagelock(p);
uvm_pageactivate(p);
+ uvm_pageunlock(p);
mutex_exit(slock);
#endif /* defined(VMSWAP) */
}
Index: src/sys/uvm/uvm_pdpolicy.h
diff -u src/sys/uvm/uvm_pdpolicy.h:1.5 src/sys/uvm/uvm_pdpolicy.h:1.6
--- src/sys/uvm/uvm_pdpolicy.h:1.5 Mon Dec 30 18:08:38 2019
+++ src/sys/uvm/uvm_pdpolicy.h Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pdpolicy.h,v 1.5 2019/12/30 18:08:38 ad Exp $ */
+/* $NetBSD: uvm_pdpolicy.h,v 1.6 2019/12/31 22:42:51 ad Exp $ */
/*-
* Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -37,7 +37,9 @@ struct vm_anon;
* don't use them directly from outside of /sys/uvm.
*/
+void uvmpdpol_idle(struct uvm_cpu *);
void uvmpdpol_init(void);
+void uvmpdpol_init_cpu(struct uvm_cpu *);
void uvmpdpol_reinit(void);
void uvmpdpol_estimatepageable(int *, int *);
bool uvmpdpol_needsscan_p(void);
@@ -47,6 +49,7 @@ void uvmpdpol_pagedeactivate(struct vm_p
void uvmpdpol_pagedequeue(struct vm_page *);
void uvmpdpol_pageenqueue(struct vm_page *);
bool uvmpdpol_pageisqueued_p(struct vm_page *);
+void uvmpdpol_pagerealize(struct vm_page *);
void uvmpdpol_anfree(struct vm_anon *);
void uvmpdpol_tune(void);
@@ -57,4 +60,17 @@ void uvmpdpol_balancequeue(int);
void uvmpdpol_sysctlsetup(void);
+/*
+ * uvmpdpol_set_intent: set an intended state for the page, taking care not
+ * to overwrite any of the other flags.
+ */
+
+static inline void
+uvmpdpol_set_intent(struct vm_page *pg, uint32_t i)
+{
+
+ KASSERT(mutex_owned(&pg->interlock));
+ pg->pqflags = PQ_INTENT_SET | (pg->pqflags & ~PQ_INTENT_MASK) | i;
+}
+
#endif /* !_UVM_PDPOLICY_H_ */
Index: src/sys/uvm/uvm_pdpolicy_clock.c
diff -u src/sys/uvm/uvm_pdpolicy_clock.c:1.27 src/sys/uvm/uvm_pdpolicy_clock.c:1.28
--- src/sys/uvm/uvm_pdpolicy_clock.c:1.27 Tue Dec 31 13:07:14 2019
+++ src/sys/uvm/uvm_pdpolicy_clock.c Tue Dec 31 22:42:51 2019
@@ -1,6 +1,35 @@
-/* $NetBSD: uvm_pdpolicy_clock.c,v 1.27 2019/12/31 13:07:14 ad Exp $ */
+/* $NetBSD: uvm_pdpolicy_clock.c,v 1.28 2019/12/31 22:42:51 ad Exp $ */
/* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
+/*-
+ * Copyright (c) 2019 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
/*
* Copyright (c) 1997 Charles D. Cranor and Washington University.
* Copyright (c) 1991, 1993, The Regents of the University of California.
@@ -69,12 +98,13 @@
#else /* defined(PDSIM) */
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.27 2019/12/31 13:07:14 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.28 2019/12/31 22:42:51 ad Exp $");
#include <sys/param.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <sys/kernel.h>
+#include <sys/kmem.h>
#include <uvm/uvm.h>
#include <uvm/uvm_pdpolicy.h>
@@ -83,9 +113,19 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy
#endif /* defined(PDSIM) */
-#define PQ_TIME 0xfffffffc /* time of last activation */
-#define PQ_INACTIVE 0x00000001 /* page is in inactive list */
-#define PQ_ACTIVE 0x00000002 /* page is in active list */
+/*
+ * per-CPU queue of pending page status changes. 128 entries makes for a
+ * 1kB queue on _LP64 and has been found to be a reasonable compromise that
+ * keeps lock contention events and wait times low, while not using too much
+ * memory nor allowing global state to fall too far behind.
+ */
+#if !defined(CLOCK_PDQ_SIZE)
+#define CLOCK_PDQ_SIZE 128
+#endif /* !defined(CLOCK_PDQ_SIZE) */
+
+#define PQ_TIME 0xffffffc0 /* time of last activation */
+#define PQ_INACTIVE 0x00000010 /* page is in inactive list */
+#define PQ_ACTIVE 0x00000020 /* page is in active list */
#if !defined(CLOCK_INACTIVEPCT)
#define CLOCK_INACTIVEPCT 33
@@ -117,6 +157,8 @@ struct uvmpdpol_scanstate {
static void uvmpdpol_pageactivate_locked(struct vm_page *);
static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
static void uvmpdpol_pagedequeue_locked(struct vm_page *);
+static bool uvmpdpol_pagerealize_locked(struct vm_page *);
+static struct uvm_cpu *uvmpdpol_flush(void);
static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
static struct uvmpdpol_scanstate pdpol_scanstate;
@@ -216,15 +258,12 @@ uvmpdpol_selectvictim(kmutex_t **plock)
/*
* acquire interlock to stablize page identity.
* if we have caught the page in a state of flux
- * and it should be dequeued, do it now and then
- * move on to the next.
+ * deal with it and retry.
*/
mutex_enter(&pg->interlock);
- if ((pg->uobject == NULL && pg->uanon == NULL) ||
- pg->wire_count > 0) {
- mutex_exit(&pg->interlock);
- uvmpdpol_pagedequeue_locked(pg);
- continue;
+ if (uvmpdpol_pagerealize_locked(pg)) {
+ mutex_exit(&pg->interlock);
+ continue;
}
/*
@@ -245,21 +284,21 @@ uvmpdpol_selectvictim(kmutex_t **plock)
anon = pg->uanon;
uobj = pg->uobject;
if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
- mutex_exit(&pg->interlock);
uvmpdpol_pageactivate_locked(pg);
+ mutex_exit(&pg->interlock);
PDPOL_EVCNT_INCR(reactexec);
continue;
}
if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
!UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
- mutex_exit(&pg->interlock);
uvmpdpol_pageactivate_locked(pg);
+ mutex_exit(&pg->interlock);
PDPOL_EVCNT_INCR(reactfile);
continue;
}
if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
- mutex_exit(&pg->interlock);
uvmpdpol_pageactivate_locked(pg);
+ mutex_exit(&pg->interlock);
PDPOL_EVCNT_INCR(reactanon);
continue;
}
@@ -292,7 +331,9 @@ uvmpdpol_selectvictim(kmutex_t **plock)
* next page.
*/
if (pmap_is_referenced(pg)) {
+ mutex_enter(&pg->interlock);
uvmpdpol_pageactivate_locked(pg);
+ mutex_exit(&pg->interlock);
uvmexp.pdreact++;
mutex_exit(lock);
continue;
@@ -339,15 +380,12 @@ uvmpdpol_balancequeue(int swap_shortage)
/*
* acquire interlock to stablize page identity.
* if we have caught the page in a state of flux
- * and it should be dequeued, do it now and then
- * move on to the next.
+ * deal with it and retry.
*/
mutex_enter(&p->interlock);
- if ((p->uobject == NULL && p->uanon == NULL) ||
- p->wire_count > 0) {
- mutex_exit(&p->interlock);
- uvmpdpol_pagedequeue_locked(p);
- continue;
+ if (uvmpdpol_pagerealize_locked(p)) {
+ mutex_exit(&p->interlock);
+ continue;
}
/*
@@ -384,7 +422,10 @@ uvmpdpol_balancequeue(int swap_shortage)
* if there's a shortage of inactive pages, deactivate.
*/
if (inactive_shortage > 0) {
+ pmap_clear_reference(p);
+ mutex_enter(&p->interlock);
uvmpdpol_pagedeactivate_locked(p);
+ mutex_exit(&p->interlock);
uvmexp.pddeact++;
inactive_shortage--;
}
@@ -397,93 +438,118 @@ uvmpdpol_balancequeue(int swap_shortage)
static void
uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
{
+ struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
- KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&s->lock));
+ KASSERT(mutex_owned(&pg->interlock));
+ KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
+ (PQ_INTENT_D | PQ_INTENT_SET));
if (pg->pqflags & PQ_ACTIVE) {
TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
- pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
KASSERT(pdpol_state.s_active > 0);
pdpol_state.s_active--;
}
if ((pg->pqflags & PQ_INACTIVE) == 0) {
KASSERT(pg->wire_count == 0);
- pmap_clear_reference(pg);
TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
- pg->pqflags |= PQ_INACTIVE;
pdpol_state.s_inactive++;
}
+ pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_INACTIVE;
}
void
uvmpdpol_pagedeactivate(struct vm_page *pg)
{
- struct uvmpdpol_globalstate *s = &pdpol_state;
- mutex_enter(&s->lock);
- uvmpdpol_pagedeactivate_locked(pg);
- mutex_exit(&s->lock);
+ KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
+
+ /*
+ * we have to clear the reference bit now, as when it comes time to
+ * realize the intent we won't have the object locked any more.
+ */
+ pmap_clear_reference(pg);
+ uvmpdpol_set_intent(pg, PQ_INTENT_I);
}
static void
uvmpdpol_pageactivate_locked(struct vm_page *pg)
{
+ struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
+
+ KASSERT(mutex_owned(&s->lock));
+ KASSERT(mutex_owned(&pg->interlock));
+ KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
+ (PQ_INTENT_D | PQ_INTENT_SET));
uvmpdpol_pagedequeue_locked(pg);
TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
- pg->pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME);
pdpol_state.s_active++;
+ pg->pqflags = (pg->pqflags & PQ_INTENT_QUEUED) | PQ_ACTIVE |
+ (hardclock_ticks & PQ_TIME);
}
void
uvmpdpol_pageactivate(struct vm_page *pg)
{
- struct uvmpdpol_globalstate *s = &pdpol_state;
+ uint32_t pqflags;
- /* Safety: PQ_ACTIVE clear also tells us if it is not enqueued. */
- if ((pg->pqflags & PQ_ACTIVE) == 0 ||
- ((hardclock_ticks & PQ_TIME) - (pg->pqflags & PQ_TIME)) >= hz) {
- mutex_enter(&s->lock);
- uvmpdpol_pageactivate_locked(pg);
- mutex_exit(&s->lock);
+ KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
+
+ /*
+ * if there is any intent set on the page, or the page is not
+ * active, or the page was activated in the "distant" past, then
+ * it needs to be activated anew.
+ */
+ pqflags = pg->pqflags;
+ if ((pqflags & PQ_INTENT_SET) != 0 ||
+ (pqflags & PQ_ACTIVE) == 0 ||
+ ((hardclock_ticks & PQ_TIME) - (pqflags & PQ_TIME)) > hz) {
+ uvmpdpol_set_intent(pg, PQ_INTENT_A);
}
}
static void
uvmpdpol_pagedequeue_locked(struct vm_page *pg)
{
+ struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
+
+ KASSERT(mutex_owned(&s->lock));
+ KASSERT(mutex_owned(&pg->interlock));
if (pg->pqflags & PQ_ACTIVE) {
TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
- pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
+ KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
KASSERT(pdpol_state.s_active > 0);
pdpol_state.s_active--;
} else if (pg->pqflags & PQ_INACTIVE) {
TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
- pg->pqflags &= ~PQ_INACTIVE;
KASSERT(pdpol_state.s_inactive > 0);
pdpol_state.s_inactive--;
}
+ pg->pqflags &= PQ_INTENT_QUEUED;
}
void
uvmpdpol_pagedequeue(struct vm_page *pg)
{
- struct uvmpdpol_globalstate *s = &pdpol_state;
- mutex_enter(&s->lock);
- uvmpdpol_pagedequeue_locked(pg);
- mutex_exit(&s->lock);
+ KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
+
+ uvmpdpol_set_intent(pg, PQ_INTENT_D);
}
void
uvmpdpol_pageenqueue(struct vm_page *pg)
{
- struct uvmpdpol_globalstate *s = &pdpol_state;
- mutex_enter(&s->lock);
- uvmpdpol_pageactivate_locked(pg);
- mutex_exit(&s->lock);
+ KASSERT(uvm_page_owner_locked_p(pg));
+ KASSERT(mutex_owned(&pg->interlock));
+
+ uvmpdpol_set_intent(pg, PQ_INTENT_E);
}
void
@@ -494,9 +560,19 @@ uvmpdpol_anfree(struct vm_anon *an)
bool
uvmpdpol_pageisqueued_p(struct vm_page *pg)
{
+ uint32_t pqflags;
- /* Safe to test unlocked due to page life-cycle. */
- return (pg->pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
+ /*
+ * if there's an intent set, we have to consider it. otherwise,
+ * return the actual state. we may be called unlocked for the
+ * purpose of assertions, which is safe due to the page lifecycle.
+ */
+ pqflags = atomic_load_relaxed(&pg->pqflags);
+ if ((pqflags & PQ_INTENT_SET) != 0) {
+ return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
+ } else {
+ return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
+ }
}
void
@@ -555,6 +631,16 @@ uvmpdpol_init(void)
}
void
+uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
+{
+
+ ucpu->pdq =
+ kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
+ ucpu->pdqhead = CLOCK_PDQ_SIZE;
+ ucpu->pdqtail = CLOCK_PDQ_SIZE;
+}
+
+void
uvmpdpol_reinit(void)
{
}
@@ -563,7 +649,9 @@ bool
uvmpdpol_needsscan_p(void)
{
- /* This must be an unlocked check: can be called from interrupt. */
+ /*
+ * this must be an unlocked check: can be called from interrupt.
+ */
return pdpol_state.s_inactive < pdpol_state.s_inactarg;
}
@@ -577,6 +665,157 @@ uvmpdpol_tune(void)
mutex_exit(&s->lock);
}
+/*
+ * uvmpdpol_pagerealize_locked: take the intended state set on an indivdual
+ * page and make it real. return true if any work was done.
+ */
+static bool
+uvmpdpol_pagerealize_locked(struct vm_page *pg)
+{
+ struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
+
+ KASSERT(mutex_owned(&s->lock));
+ KASSERT(mutex_owned(&pg->interlock));
+
+ switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
+ case PQ_INTENT_A | PQ_INTENT_SET:
+ case PQ_INTENT_E | PQ_INTENT_SET:
+ uvmpdpol_pageactivate_locked(pg);
+ return true;
+ case PQ_INTENT_I | PQ_INTENT_SET:
+ uvmpdpol_pagedeactivate_locked(pg);
+ return true;
+ case PQ_INTENT_D | PQ_INTENT_SET:
+ uvmpdpol_pagedequeue_locked(pg);
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * uvmpdpol_flush: return the current uvm_cpu with all of its pending
+ * updates flushed to the global queues. this routine may block, and
+ * so can switch cpu. the idea is to empty to queue on whatever cpu
+ * we finally end up on.
+ */
+static struct uvm_cpu *
+uvmpdpol_flush(void)
+{
+ struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
+ struct uvm_cpu *ucpu;
+ struct vm_page *pg;
+
+ KASSERT(kpreempt_disabled());
+
+ mutex_enter(&s->lock);
+ for (;;) {
+ /*
+ * prefer scanning forwards (even though mutex_enter() is
+ * serializing) so as to not defeat any prefetch logic in
+ * the CPU. that means elsewhere enqueuing backwards, like
+ * a stack, but not so important there as pages are being
+ * added singularly.
+ *
+ * prefetch the next "struct vm_page" while working on the
+ * current one. this has a measurable and very positive
+ * effect in reducing the amount of time spent here under
+ * the global lock.
+ */
+ ucpu = curcpu()->ci_data.cpu_uvm;
+ KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
+ if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
+ break;
+ }
+ pg = ucpu->pdq[ucpu->pdqhead++];
+ if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
+ __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
+ }
+ mutex_enter(&pg->interlock);
+ pg->pqflags &= ~PQ_INTENT_QUEUED;
+ (void)uvmpdpol_pagerealize_locked(pg);
+ mutex_exit(&pg->interlock);
+ }
+ mutex_exit(&s->lock);
+ return ucpu;
+}
+
+/*
+ * uvmpdpol_pagerealize: realize any intent set on the page. in this
+ * implementation, that means putting the page on a per-CPU queue to be
+ * dealt with later.
+ */
+void
+uvmpdpol_pagerealize(struct vm_page *pg)
+{
+ struct uvm_cpu *ucpu;
+
+ /*
+ * drain the per per-CPU queue if full, then enter the page.
+ */
+ kpreempt_disable();
+ ucpu = curcpu()->ci_data.cpu_uvm;
+ if (__predict_false(ucpu->pdqhead == 0)) {
+ ucpu = uvmpdpol_flush();
+ }
+ ucpu->pdq[--(ucpu->pdqhead)] = pg;
+ kpreempt_enable();
+}
+
+/*
+ * uvmpdpol_idle: called from the system idle loop. periodically purge any
+ * pending updates back to the global queues.
+ */
+void
+uvmpdpol_idle(struct uvm_cpu *ucpu)
+{
+ struct uvmpdpol_globalstate *s = &pdpol_state;
+ struct vm_page *pg;
+
+ KASSERT(kpreempt_disabled());
+
+ /*
+ * if no pages in the queue, we have nothing to do.
+ */
+ if (ucpu->pdqhead == ucpu->pdqtail) {
+ ucpu->pdqtime = hardclock_ticks;
+ return;
+ }
+
+ /*
+ * don't do this more than ~8 times a second as it would needlessly
+ * exert pressure.
+ */
+ if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) {
+ return;
+ }
+
+ /*
+ * the idle LWP can't block, so we have to try for the lock. if we
+ * get it, purge the per-CPU pending update queue. continually
+ * check for a pending resched: in that case exit immediately.
+ */
+ if (mutex_tryenter(&s->lock)) {
+ while (ucpu->pdqhead != ucpu->pdqtail) {
+ pg = ucpu->pdq[ucpu->pdqhead];
+ if (!mutex_tryenter(&pg->interlock)) {
+ break;
+ }
+ ucpu->pdqhead++;
+ pg->pqflags &= ~PQ_INTENT_QUEUED;
+ (void)uvmpdpol_pagerealize_locked(pg);
+ mutex_exit(&pg->interlock);
+ if (curcpu()->ci_want_resched) {
+ break;
+ }
+ }
+ if (ucpu->pdqhead == ucpu->pdqtail) {
+ ucpu->pdqtime = hardclock_ticks;
+ }
+ mutex_exit(&s->lock);
+ }
+}
+
#if !defined(PDSIM)
#include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
Index: src/sys/uvm/uvm_pdpolicy_clockpro.c
diff -u src/sys/uvm/uvm_pdpolicy_clockpro.c:1.21 src/sys/uvm/uvm_pdpolicy_clockpro.c:1.22
--- src/sys/uvm/uvm_pdpolicy_clockpro.c:1.21 Tue Dec 31 12:40:27 2019
+++ src/sys/uvm/uvm_pdpolicy_clockpro.c Tue Dec 31 22:42:51 2019
@@ -1,4 +1,4 @@
-/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.21 2019/12/31 12:40:27 ad Exp $ */
+/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.22 2019/12/31 22:42:51 ad Exp $ */
/*-
* Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -43,7 +43,7 @@
#else /* defined(PDSIM) */
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.21 2019/12/31 12:40:27 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.22 2019/12/31 22:42:51 ad Exp $");
#include "opt_ddb.h"
@@ -121,13 +121,13 @@ PDPOL_EVCNT_DEFINE(speculativemiss)
PDPOL_EVCNT_DEFINE(locksuccess)
PDPOL_EVCNT_DEFINE(lockfail)
-#define PQ_REFERENCED 0x000000001
-#define PQ_HOT 0x000000002
-#define PQ_TEST 0x000000004
-#define PQ_INITIALREF 0x000000008
-#define PQ_QMASK 0x000000070
-#define PQ_QFACTOR 0x000000010
-#define PQ_SPECULATIVE 0x000000080
+#define PQ_REFERENCED 0x000000010
+#define PQ_HOT 0x000000020
+#define PQ_TEST 0x000000040
+#define PQ_INITIALREF 0x000000080
+#define PQ_QMASK 0x000000700
+#define PQ_QFACTOR 0x000000100
+#define PQ_SPECULATIVE 0x000000800
#define CLOCKPRO_NOQUEUE 0
#define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */
@@ -141,6 +141,8 @@ PDPOL_EVCNT_DEFINE(lockfail)
#define CLOCKPRO_LISTQ 4
#define CLOCKPRO_NQUEUE 4
+static bool uvmpdpol_pagerealize_locked(struct vm_page *);
+
static inline void
clockpro_setq(struct vm_page *pg, int qidx)
{
@@ -1129,12 +1131,10 @@ done:;
return pg;
}
-void
-uvmpdpol_pageactivate(struct vm_page *pg)
+static void
+uvmpdpol_pageactivate_locked(struct vm_page *pg)
{
- struct clockpro_state * const s = &clockpro;
- mutex_enter(&s->lock);
if (!uvmpdpol_pageisqueued_p(pg)) {
KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
pg->pqflags |= PQ_INITIALREF;
@@ -1147,54 +1147,111 @@ uvmpdpol_pageactivate(struct vm_page *pg
clockpro_pageenqueue(pg);
}
pg->pqflags |= PQ_REFERENCED;
- mutex_exit(&s->lock);
}
void
-uvmpdpol_pagedeactivate(struct vm_page *pg)
+uvmpdpol_pageactivate(struct vm_page *pg)
+{
+
+ uvmpdpol_set_intent(pg, PQ_INTENT_A);
+}
+
+static void
+uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
{
- struct clockpro_state * const s = &clockpro;
- mutex_enter(&s->lock);
clockpro_clearreferencebit(pg, true);
- mutex_exit(&s->lock);
}
void
-uvmpdpol_pagedequeue(struct vm_page *pg)
+uvmpdpol_pagedeactivate(struct vm_page *pg)
+{
+
+ uvmpdpol_set_intent(pg, PQ_INTENT_I);
+}
+
+static void
+uvmpdpol_pagedequeue_locked(struct vm_page *pg)
{
- struct clockpro_state * const s = &clockpro;
if (!uvmpdpol_pageisqueued_p(pg)) {
return;
}
- mutex_enter(&s->lock);
clockpro_pagedequeue(pg);
pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE);
- mutex_exit(&s->lock);
}
void
-uvmpdpol_pageenqueue(struct vm_page *pg)
+uvmpdpol_pagedequeue(struct vm_page *pg)
{
-#if 1
- struct clockpro_state * const s = &clockpro;
+ uvmpdpol_set_intent(pg, PQ_INTENT_D);
+}
+
+static void
+uvmpdpol_pageenqueue_locked(struct vm_page *pg)
+{
+#if 1
if (uvmpdpol_pageisqueued_p(pg)) {
return;
}
- mutex_enter(&s->lock);
clockpro_clearreferencebit(pg, true);
pg->pqflags |= PQ_SPECULATIVE;
clockpro_pageenqueue(pg);
- mutex_exit(&s->lock);
#else
- uvmpdpol_pageactivate(pg);
+ uvmpdpol_pageactivate_locked(pg);
#endif
}
void
+uvmpdpol_pageenqueue(struct vm_page *pg)
+{
+
+ uvmpdpol_set_intent(pg, PQ_INTENT_D);
+}
+
+static bool
+uvmpdpol_pagerealize_locked(struct vm_page *pg)
+{
+ uint32_t pqflags;
+
+ KASSERT(mutex_owned(&clockpro.lock));
+ KASSERT(mutex_owned(&pg->interlock));
+
+ /* XXX this needs to be called from elsewhere, like uvmpdpol_clock. */
+
+ pqflags = pg->pqflags;
+ pq->pqflags &= ~(PQ_INTENT_SET | PQ_INTENT_QUEUED);
+ switch (pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
+ case PQ_INTENT_A | PQ_INTENT_SET:
+ uvmpdpol_pageactivate_locked(pg);
+ return true;
+ case PQ_INTENT_E | PQ_INTENT_SET:
+ uvmpdpol_pageenqueue_locked(pg);
+ return true;
+ case PQ_INTENT_I | PQ_INTENT_SET:
+ uvmpdpol_pagedeactivate_locked(pg);
+ return true;
+ case PQ_INTENT_D | PQ_INTENT_SET:
+ uvmpdpol_pagedequeue_locked(pg);
+ return true;
+ default:
+ return false;
+ }
+}
+
+void
+uvmpdpol_pagerealize(struct vm_page *pg)
+{
+ struct clockpro_state * const s = &clockpro;
+
+ mutex_enter(&s->lock);
+ uvmpdpol_pagerealize_locked(pg);
+ mutex_exit(&s->lock);
+}
+
+void
uvmpdpol_anfree(struct vm_anon *an)
{
struct clockpro_state * const s = &clockpro;
@@ -1398,6 +1455,12 @@ uvmpdpol_tune(void)
mutex_exit(&s->lock);
}
+void
+uvmpdpol_idle(void)
+{
+
+}
+
#if !defined(PDSIM)
#include <sys/sysctl.h> /* XXX SYSCTL_DESCR */