On Thu, 27 Sep 2007 15:21:21 -0700
Andrew Morton <[EMAIL PROTECTED]> wrote:

> > Nope, sc.nr_io_pages will also be incremented when the code runs into
> > pages that are already PageWriteback.
> 
> yup, I didn't think of that.  Hopefully someone else will be in there
> working on that zone too.  If this caller yields and defers to kswapd
> then that's very likely.  Except we just took away the ability to do that..

                if (PageDirty(page)) {
                        if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
                                goto keep_locked;
                        if (!may_enter_fs)
                                goto keep_locked;

I think we can fix that problem by adding a sc->nr_io_pages++
between the last if and the goto keep_locked in shrink_page_list.

That way !GFP_IO or !GFP_FS tasks will cause themselves to sleep
if there are pages that need to be written out, even if those
pages are not in flight to disk yet.

I have also added the comment you wanted.

Signed-off-by: Rik van Riel <[EMAIL PROTECTED]>

diff -up linux-2.6.23-rc7/mm/vmscan.c.wait linux-2.6.22/mm/vmscan.c
--- linux-2.6.23-rc7/mm/vmscan.c.wait   2007-09-27 18:45:57.000000000 -0400
+++ linux-2.6.23-rc7/mm/vmscan.c        2007-09-27 18:48:43.000000000 -0400
@@ -68,6 +68,13 @@ struct scan_control {
        int all_unreclaimable;
 
        int order;
+
+       /*
+        * Pages that have (or should have) IO pending.  If we run into
+        * a lot of these, we're better off waiting a little for IO to
+        * finish rather than scanning more pages in the VM.
+        */
+       int nr_io_pages;
 };
 
 #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -489,8 +496,10 @@ static unsigned long shrink_page_list(st
                         */
                        if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs)
                                wait_on_page_writeback(page);
-                       else
+                       else {
+                               sc->nr_io_pages++;
                                goto keep_locked;
+                       }
                }
 
                referenced = page_referenced(page, 1);
@@ -529,8 +538,10 @@ static unsigned long shrink_page_list(st
                if (PageDirty(page)) {
                        if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && referenced)
                                goto keep_locked;
-                       if (!may_enter_fs)
+                       if (!may_enter_fs) {
+                               sc->nr_io_pages++;
                                goto keep_locked;
+                       }
                        if (!sc->may_writepage)
                                goto keep_locked;
 
@@ -541,8 +552,10 @@ static unsigned long shrink_page_list(st
                        case PAGE_ACTIVATE:
                                goto activate_locked;
                        case PAGE_SUCCESS:
-                               if (PageWriteback(page) || PageDirty(page))
+                               if (PageWriteback(page) || PageDirty(page)) {
+                                       sc->nr_io_pages++;
                                        goto keep;
+                               }
                                /*
                                 * A synchronous write - probably a ramdisk.  Go
                                 * ahead and try to reclaim the page.
@@ -1201,6 +1214,7 @@ unsigned long try_to_free_pages(struct z
 
        for (priority = DEF_PRIORITY; priority >= 0; priority--) {
                sc.nr_scanned = 0;
+               sc.nr_io_pages = 0;
                if (!priority)
                        disable_swap_token();
                nr_reclaimed += shrink_zones(priority, zones, &sc);
@@ -1229,7 +1243,8 @@ unsigned long try_to_free_pages(struct z
                }
 
                /* Take a nap, wait for some writeback to complete */
-               if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
+               if (sc.nr_scanned && priority < DEF_PRIORITY - 2 &&
+                               sc.nr_io_pages > sc.swap_cluster_max)
                        congestion_wait(WRITE, HZ/10);
        }
        /* top priority shrink_caches still had more to do? don't OOM, then */
@@ -1315,6 +1330,7 @@ loop_again:
                if (!priority)
                        disable_swap_token();
 
+               sc.nr_io_pages = 0;
                all_zones_ok = 1;
 
                /*
@@ -1398,7 +1414,8 @@ loop_again:
                 * OK, kswapd is getting into trouble.  Take a nap, then take
                 * another pass across the zones.
                 */
-               if (total_scanned && priority < DEF_PRIORITY - 2)
+               if (total_scanned && priority < DEF_PRIORITY - 2 &&
+                                       sc.nr_io_pages > sc.swap_cluster_max)
                        congestion_wait(WRITE, HZ/10);
 
                /*
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to