Here's a test patch, inclusive of some debugging sysctls:

        vm.always_launder       set to 1 to give up on trying to avoid
                                pageouts.

        vm.vm_pageout_stats_rescans
                                Number of times the main inactive scan
                                in the pageout loop had to restart

        vm.vm_pageout_stats_xtralaunder
                                Number of times a second pass had to be
                                taken (in normal mode, with always_launder
                                set to 0).

    This patch:

        * implements a placemarker to try to avoid restarts.

        * does not penalize the pageout daemon for being able 
          to cluster writes.

        * adds an additional vnode check that should be there

    One last note:  I wrote a quick and dirty program to mmap() a bunch
    of big files MAP_NOSYNC and then dirty them in a loop.  I noticed
    that the filesystem update daemon 'froze up' the system for about a 
    second every 30 seconds due to the huge number of dirty MAP_NOSYNC
    pages (about 1GB worth) sitting around (it has to scan the vm_page_t's
    even if it doesn't do anything with them).  This is a separate issue.

    If Alfred, and others running heavily loaded systems are able to test
    this patch sufficiently, we can include it (minus the debugging
    sysctls) in the release.  If not, I will wait until after
    the release is rolled before I commit it or whatever the final patch
    winds up looking like.

                                        -Matt

Index: vm_page.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_page.c,v
retrieving revision 1.147.2.3
diff -u -r1.147.2.3 vm_page.c
--- vm_page.c   2000/08/04 22:31:11     1.147.2.3
+++ vm_page.c   2000/10/26 04:43:22
@@ -1783,6 +1783,12 @@
                                        ("contigmalloc1: page %p is not PQ_INACTIVE", 
m));
 
                                next = TAILQ_NEXT(m, pageq);
+                               /*
+                                * ignore markers
+                                */
+                               if (m->flags & PG_MARKER)
+                                       continue;
+
                                if (vm_page_sleep_busy(m, TRUE, "vpctw0"))
                                        goto again1;
                                vm_page_test_dirty(m);
Index: vm_page.h
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_page.h,v
retrieving revision 1.75.2.3
diff -u -r1.75.2.3 vm_page.h
--- vm_page.h   2000/09/16 01:08:03     1.75.2.3
+++ vm_page.h   2000/10/26 04:17:28
@@ -251,6 +251,7 @@
 #define PG_SWAPINPROG  0x0200          /* swap I/O in progress on page      */
 #define PG_NOSYNC      0x0400          /* do not collect for syncer */
 #define PG_UNMANAGED   0x0800          /* No PV management for page */
+#define PG_MARKER      0x1000          /* special queue marker page */
 
 /*
  * Misc constants.
Index: vm_pageout.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vm_pageout.c,v
retrieving revision 1.151.2.4
diff -u -r1.151.2.4 vm_pageout.c
--- vm_pageout.c        2000/08/04 22:31:11     1.151.2.4
+++ vm_pageout.c        2000/10/26 05:07:45
@@ -143,6 +143,9 @@
 static int disable_swap_pageouts=0;
 
 static int max_page_launder=100;
+static int always_launder=0;
+static int vm_pageout_stats_rescans=0;
+static int vm_pageout_stats_xtralaunder=0;
 #if defined(NO_SWAPPING)
 static int vm_swap_enabled=0;
 static int vm_swap_idle_enabled=0;
@@ -186,6 +189,12 @@
 
 SYSCTL_INT(_vm, OID_AUTO, max_page_launder,
        CTLFLAG_RW, &max_page_launder, 0, "Maximum number of pages to clean per pass");
+SYSCTL_INT(_vm, OID_AUTO, always_launder,
+       CTLFLAG_RW, &always_launder, 0, "Always launder on the first pass");
+SYSCTL_INT(_vm, OID_AUTO, vm_pageout_stats_rescans,
+       CTLFLAG_RD, &vm_pageout_stats_rescans, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, vm_pageout_stats_xtralaunder,
+       CTLFLAG_RD, &vm_pageout_stats_xtralaunder, 0, "");
 
 
 #define VM_PAGEOUT_PAGE_COUNT 16
@@ -613,11 +622,16 @@
 
 /*
  *     vm_pageout_scan does the dirty work for the pageout daemon.
+ *
+ *     This code is responsible for calculating the page shortage
+ *     and then attempting to clean or free enough pages to hit that
+ *     mark.
  */
 static int
 vm_pageout_scan()
 {
        vm_page_t m, next;
+       struct vm_page marker;
        int page_shortage, maxscan, pcount;
        int addl_page_shortage, addl_page_shortage_init;
        int maxlaunder;
@@ -651,27 +665,41 @@
        /*
         * Figure out what to do with dirty pages when they are encountered.
         * Assume that 1/3 of the pages on the inactive list are clean.  If
-        * we think we can reach our target, disable laundering (do not
-        * clean any dirty pages).  If we miss the target we will loop back
-        * up and do a laundering run.
+        * we think we can reach our target, reduce the amount of launder we
+        * try to do in the first pass significantly.  If we miss the target
+        * we will loop back up and do a full laundering run.
+        *
+        * If always_launder is set, we do a full laundering run on the
+        * first pass.
         */
 
-       if (cnt.v_inactive_count / 3 > page_shortage) {
+       if (always_launder == 0 && cnt.v_inactive_count / 3 > page_shortage) {
+#if 0  /* THIS MAY BE BETTER */
+               maxlaunder = cnt.v_inactive_target / 10 + 1;
+#endif
                maxlaunder = 0;
                launder_loop = 0;
        } else {
-               maxlaunder = 
-                   (cnt.v_inactive_target > max_page_launder) ?
-                   max_page_launder : cnt.v_inactive_target;
+               maxlaunder = cnt.v_inactive_target;
                launder_loop = 1;
        }
+       if (maxlaunder > max_page_launder)
+           maxlaunder = max_page_launder;
 
        /*
+        * Initialize our marker
+        */
+       bzero(&marker, sizeof(marker));
+       marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
+       marker.valid = 0;
+       marker.queue = PQ_INACTIVE;
+       marker.wire_count = 1;
+
+       /*
         * Start scanning the inactive queue for pages we can move to the
         * cache or free.  The scan will stop when the target is reached or
         * we have scanned the entire inactive queue.
         */
-
 rescan0:
        addl_page_shortage = addl_page_shortage_init;
        maxscan = cnt.v_inactive_count;
@@ -682,11 +710,18 @@
                cnt.v_pdpages++;
 
                if (m->queue != PQ_INACTIVE) {
+                       ++vm_pageout_stats_rescans;
                        goto rescan0;
                }
 
                next = TAILQ_NEXT(m, pageq);
 
+               /*
+                * Skip marker pages
+                */
+               if (m->flags & PG_MARKER)
+                       continue;
+
                if (m->hold_count) {
                        s = splvm();
                        TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
@@ -763,7 +798,8 @@
                        --page_shortage;
 
                /*
-                * Clean pages can be placed onto the cache queue.
+                * Clean pages can be placed onto the cache queue, which
+                * is almost the same as freeing them.
                 */
                } else if (m->dirty == 0) {
                        vm_page_cache(m);
@@ -774,7 +810,6 @@
                 * only a limited number of pages per pagedaemon pass.
                 */
                } else if (maxlaunder > 0) {
-                       int written;
                        int swap_pageouts_ok;
                        struct vnode *vp = NULL;
 
@@ -871,10 +906,16 @@
                                }
 
                                /*
-                                * The page might have been moved to another queue
-                                * during potential blocking in vget() above.
+                                * The page might have been moved to another
+                                * queue during potential blocking in vget()
+                                * above.  The page might have been freed and
+                                * reused for another vnode.  The object might
+                                * have been reused for another vnode.
                                 */
-                               if (m->queue != PQ_INACTIVE) {
+                               if (m->queue != PQ_INACTIVE ||
+                                   m->object != object ||
+                                   object->handle != vp
+                               ) {
                                        if (object->flags & OBJ_MIGHTBEDIRTY)
                                                vnodes_skipped++;
                                        vput(vp);
@@ -882,9 +923,10 @@
                                }
        
                                /*
-                                * The page may have been busied during the blocking in
-                                * vput();  We don't move the page back onto the end of
-                                * the queue so that statistics are more correct if we 
don't.
+                                * The page may have been busied during the
+                                * blocking in vput();  We don't move the
+                                * page back onto the end of the queue so that
+                                * statistics are more correct if we don't.
                                 */
                                if (m->busy || (m->flags & PG_BUSY)) {
                                        vput(vp);
@@ -910,13 +952,27 @@
                         * If a page is dirty, then it is either being washed
                         * (but not yet cleaned) or it is still in the
                         * laundry.  If it is still in the laundry, then we
-                        * start the cleaning operation.
+                        * start the cleaning operation.  maxlaunder nominally
+                        * counts I/O cost, essentially seeks, so we drop it
+                        * by one no matter how large a cluster
+                        * vm_pageout_clean() is able to put together.
+                        *
+                        * This operation may cluster-out, causing the 'next'
+                        * page to move to another queue.  To avoid loosing our
+                        * place we insert a placemarker, then recalculate
+                        * next after vm_pageout_clean() returns.
                         */
-                       written = vm_pageout_clean(m);
-                       if (vp)
+                       s = splvm();
+                       TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, m, 
+&marker, pageq);
+                       splx(s);
+                       if (vm_pageout_clean(m) != 0)
+                               --maxlaunder;
+                       s = splvm();
+                       next = TAILQ_NEXT(&marker, pageq);
+                       TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq);
+                       splx(s);
+                       if (vp != NULL)
                                vput(vp);
-
-                       maxlaunder -= written;
                }
        }
 
@@ -930,6 +986,7 @@
                maxlaunder = 
                    (cnt.v_inactive_target > max_page_launder) ?
                    max_page_launder : cnt.v_inactive_target;
+               ++vm_pageout_stats_xtralaunder;
                goto rescan0;
        }
 


To Unsubscribe: send mail to [EMAIL PROTECTED]
with "unsubscribe freebsd-hackers" in the body of the message

Reply via email to