Here's a test patch, inclusive of some debugging sysctls:
vm.always_launder set to 1 to give up on trying to avoid
pageouts.
vm.vm_pageout_stats_rescans
Number of times the main inactive scan
in the pageout loop had to restart
vm.vm_pageout_stats_xtralaunder
Number of times a second pass had to be
taken (in normal mode, with always_launder
set to 0).
This patch:
* implements a placemarker to try to avoid restarts.
* does not penalize the pageout daemon for being able
to cluster writes.
* adds an additional vnode check that should be there
One last note: I wrote a quick and dirty program to mmap() a bunch
of big files MAP_NOSYNC and then dirty them in a loop. I noticed
that the filesystem update daemon 'froze up' the system for about a
second every 30 seconds due to the huge number of dirty MAP_NOSYNC
pages (about 1GB worth) sitting around (it has to scan the vm_page_t's
even if it doesn't do anything with them). This is a separate issue.
If Alfred, and others running heavily loaded systems are able to test
this patch sufficiently, we can include it (minus the debugging
sysctls) in the release. If not, I will wait until after
the release is rolled before I commit it or whatever the final patch
winds up looking like.
-Matt
Index: vm_page.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_page.c,v
retrieving revision 1.147.2.3
diff -u -r1.147.2.3 vm_page.c
--- vm_page.c 2000/08/04 22:31:11 1.147.2.3
+++ vm_page.c 2000/10/26 04:43:22
@@ -1783,6 +1783,12 @@
("contigmalloc1: page %p is not PQ_INACTIVE",
m));
next = TAILQ_NEXT(m, pageq);
+ /*
+ * ignore markers
+ */
+ if (m->flags & PG_MARKER)
+ continue;
+
if (vm_page_sleep_busy(m, TRUE, "vpctw0"))
goto again1;
vm_page_test_dirty(m);
Index: vm_page.h
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_page.h,v
retrieving revision 1.75.2.3
diff -u -r1.75.2.3 vm_page.h
--- vm_page.h 2000/09/16 01:08:03 1.75.2.3
+++ vm_page.h 2000/10/26 04:17:28
@@ -251,6 +251,7 @@
#define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */
#define PG_NOSYNC 0x0400 /* do not collect for syncer */
#define PG_UNMANAGED 0x0800 /* No PV management for page */
+#define PG_MARKER 0x1000 /* special queue marker page */
/*
* Misc constants.
Index: vm_pageout.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_pageout.c,v
retrieving revision 1.151.2.4
diff -u -r1.151.2.4 vm_pageout.c
--- vm_pageout.c 2000/08/04 22:31:11 1.151.2.4
+++ vm_pageout.c 2000/10/26 05:07:45
@@ -143,6 +143,9 @@
static int disable_swap_pageouts=0;
static int max_page_launder=100;
+static int always_launder=0;
+static int vm_pageout_stats_rescans=0;
+static int vm_pageout_stats_xtralaunder=0;
#if defined(NO_SWAPPING)
static int vm_swap_enabled=0;
static int vm_swap_idle_enabled=0;
@@ -186,6 +189,12 @@
SYSCTL_INT(_vm, OID_AUTO, max_page_launder,
CTLFLAG_RW, &max_page_launder, 0, "Maximum number of pages to clean per pass");
+SYSCTL_INT(_vm, OID_AUTO, always_launder,
+ CTLFLAG_RW, &always_launder, 0, "Always launder on the first pass");
+SYSCTL_INT(_vm, OID_AUTO, vm_pageout_stats_rescans,
+ CTLFLAG_RD, &vm_pageout_stats_rescans, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_pageout_stats_xtralaunder,
+ CTLFLAG_RD, &vm_pageout_stats_xtralaunder, 0, "");
#define VM_PAGEOUT_PAGE_COUNT 16
@@ -613,11 +622,16 @@
/*
* vm_pageout_scan does the dirty work for the pageout daemon.
+ *
+ * This code is responsible for calculating the page shortage
+ * and then attempting to clean or free enough pages to hit that
+ * mark.
*/
static int
vm_pageout_scan()
{
vm_page_t m, next;
+ struct vm_page marker;
int page_shortage, maxscan, pcount;
int addl_page_shortage, addl_page_shortage_init;
int maxlaunder;
@@ -651,27 +665,41 @@
/*
* Figure out what to do with dirty pages when they are encountered.
* Assume that 1/3 of the pages on the inactive list are clean. If
- * we think we can reach our target, disable laundering (do not
- * clean any dirty pages). If we miss the target we will loop back
- * up and do a laundering run.
+ * we think we can reach our target, reduce the amount of launder we
+ * try to do in the first pass significantly. If we miss the target
+ * we will loop back up and do a full laundering run.
+ *
+ * If always_launder is set, we do a full laundering run on the
+ * first pass.
*/
- if (cnt.v_inactive_count / 3 > page_shortage) {
+ if (always_launder == 0 && cnt.v_inactive_count / 3 > page_shortage) {
+#if 0 /* THIS MAY BE BETTER */
+ maxlaunder = cnt.v_inactive_target / 10 + 1;
+#endif
maxlaunder = 0;
launder_loop = 0;
} else {
- maxlaunder =
- (cnt.v_inactive_target > max_page_launder) ?
- max_page_launder : cnt.v_inactive_target;
+ maxlaunder = cnt.v_inactive_target;
launder_loop = 1;
}
+ if (maxlaunder > max_page_launder)
+ maxlaunder = max_page_launder;
/*
+ * Initialize our marker
+ */
+ bzero(&marker, sizeof(marker));
+ marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
+ marker.valid = 0;
+ marker.queue = PQ_INACTIVE;
+ marker.wire_count = 1;
+
+ /*
* Start scanning the inactive queue for pages we can move to the
* cache or free. The scan will stop when the target is reached or
* we have scanned the entire inactive queue.
*/
-
rescan0:
addl_page_shortage = addl_page_shortage_init;
maxscan = cnt.v_inactive_count;
@@ -682,11 +710,18 @@
cnt.v_pdpages++;
if (m->queue != PQ_INACTIVE) {
+ ++vm_pageout_stats_rescans;
goto rescan0;
}
next = TAILQ_NEXT(m, pageq);
+ /*
+ * Skip marker pages
+ */
+ if (m->flags & PG_MARKER)
+ continue;
+
if (m->hold_count) {
s = splvm();
TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
@@ -763,7 +798,8 @@
--page_shortage;
/*
- * Clean pages can be placed onto the cache queue.
+ * Clean pages can be placed onto the cache queue, which
+ * is almost the same as freeing them.
*/
} else if (m->dirty == 0) {
vm_page_cache(m);
@@ -774,7 +810,6 @@
* only a limited number of pages per pagedaemon pass.
*/
} else if (maxlaunder > 0) {
- int written;
int swap_pageouts_ok;
struct vnode *vp = NULL;
@@ -871,10 +906,16 @@
}
/*
- * The page might have been moved to another queue
- * during potential blocking in vget() above.
+ * The page might have been moved to another
+ * queue during potential blocking in vget()
+ * above. The page might have been freed and
+ * reused for another vnode. The object might
+ * have been reused for another vnode.
*/
- if (m->queue != PQ_INACTIVE) {
+ if (m->queue != PQ_INACTIVE ||
+ m->object != object ||
+ object->handle != vp
+ ) {
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
vput(vp);
@@ -882,9 +923,10 @@
}
/*
- * The page may have been busied during the blocking in
- * vput(); We don't move the page back onto the end of
- * the queue so that statistics are more correct if we
don't.
+ * The page may have been busied during the
+ * blocking in vput(); We don't move the
+ * page back onto the end of the queue so that
+ * statistics are more correct if we don't.
*/
if (m->busy || (m->flags & PG_BUSY)) {
vput(vp);
@@ -910,13 +952,27 @@
* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the
* laundry. If it is still in the laundry, then we
- * start the cleaning operation.
+ * start the cleaning operation. maxlaunder nominally
+ * counts I/O cost, essentially seeks, so we drop it
+ * by one no matter how large a cluster
+ * vm_pageout_clean() is able to put together.
+ *
+ * This operation may cluster-out, causing the 'next'
+ * page to move to another queue. To avoid loosing our
+ * place we insert a placemarker, then recalculate
+ * next after vm_pageout_clean() returns.
*/
- written = vm_pageout_clean(m);
- if (vp)
+ s = splvm();
+ TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, m,
+&marker, pageq);
+ splx(s);
+ if (vm_pageout_clean(m) != 0)
+ --maxlaunder;
+ s = splvm();
+ next = TAILQ_NEXT(&marker, pageq);
+ TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq);
+ splx(s);
+ if (vp != NULL)
vput(vp);
-
- maxlaunder -= written;
}
}
@@ -930,6 +986,7 @@
maxlaunder =
(cnt.v_inactive_target > max_page_launder) ?
max_page_launder : cnt.v_inactive_target;
+ ++vm_pageout_stats_xtralaunder;
goto rescan0;
}
To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-hackers" in the body of the message