Hi

this patch is page reclaim improvement.

o previous discussion:
        http://marc.info/?l=linux-mm&m=120339997125985&w=2

o test method
  $ ./hackbench 120 process 1000

o test result (average of 5 times measure)

limit   hackbench     sys-time     major-fault   max-spent-time 
        time(s)       (s)                        in shrink_zone()
                                                 (jiffies)
--------------------------------------------------------------------
3       42.06         378.70       5336          6306


o reason why restrict parallel reclaim 3 task per zone

we tested various parameter.
  - restrict 1 is best major fault.
    but worst max spent time.
  - restrict 3 is best max spent reclaim time and hackbench result.

I think "restrict 3" cause most good experience.


limit      hackbench     sys-time     major-fault   max-spent-time 
           time(s)       (s)                        in shrink_zone()
                                                    (jiffies)
--------------------------------------------------------------------
1          48.50         283.89       3690          9057
2          44.43         350.94       5245          7159
3          42.06         378.70       5336          6306
4          48.84         401.87       5474          6669
unlimited  282.30        1248.47      29026          -



Please any comments!



Signed-off-by: KOSAKI Motohiro <[EMAIL PROTECTED]>
CC: KAMEZAWA Hiroyuki <[EMAIL PROTECTED]>
CC: Balbir Singh <[EMAIL PROTECTED]>
CC: Rik van Riel <[EMAIL PROTECTED]>
CC: Lee Schermerhorn <[EMAIL PROTECTED]>
CC: Nick Piggin <[EMAIL PROTECTED]>


---
 include/linux/mmzone.h |    3 +
 mm/page_alloc.c        |    4 +
 mm/vmscan.c            |  101 ++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 99 insertions(+), 9 deletions(-)

Index: b/include/linux/mmzone.h
===================================================================
--- a/include/linux/mmzone.h    2008-02-25 21:37:49.000000000 +0900
+++ b/include/linux/mmzone.h    2008-02-26 10:12:12.000000000 +0900
@@ -335,6 +335,9 @@ struct zone {
        unsigned long           spanned_pages;  /* total size, including holes 
*/
        unsigned long           present_pages;  /* amount of memory (excluding 
holes) */
 
+
+       atomic_t                nr_reclaimers;
+       wait_queue_head_t       reclaim_throttle_waitq;
        /*
         * rarely used fields:
         */
Index: b/mm/page_alloc.c
===================================================================
--- a/mm/page_alloc.c   2008-02-25 21:37:49.000000000 +0900
+++ b/mm/page_alloc.c   2008-02-26 10:12:12.000000000 +0900
@@ -3466,6 +3466,10 @@ static void __meminit free_area_init_cor
                zone->nr_scan_inactive = 0;
                zap_zone_vm_stats(zone);
                zone->flags = 0;
+
+               zone->nr_reclaimers = ATOMIC_INIT(0);
+               init_waitqueue_head(&zone->reclaim_throttle_waitq);
+
                if (!size)
                        continue;
 
Index: b/mm/vmscan.c
===================================================================
--- a/mm/vmscan.c       2008-02-25 21:37:49.000000000 +0900
+++ b/mm/vmscan.c       2008-02-26 10:59:38.000000000 +0900
@@ -1252,6 +1252,55 @@ static unsigned long shrink_zone(int pri
        return nr_reclaimed;
 }
 
+
+#define RECLAIM_LIMIT (3)
+
+static int do_shrink_zone_throttled(int priority, struct zone *zone,
+                                   struct scan_control *sc,
+                                   unsigned long *ret_reclaimed)
+{
+       u64 start_time;
+       int ret = 0;
+
+       start_time = jiffies_64;
+
+       wait_event(zone->reclaim_throttle_waitq,
+                  atomic_add_unless(&zone->nr_reclaimers, 1, RECLAIM_LIMIT));
+
+       /* more reclaim until needed? */
+       if (scan_global_lru(sc) &&
+           !(current->flags & PF_KSWAPD) &&
+           time_after64(jiffies, start_time + HZ/10)) {
+               if (zone_watermark_ok(zone, sc->order, 4*zone->pages_high,
+                                     MAX_NR_ZONES-1, 0)) {
+                       ret = -EAGAIN;
+                       goto out;
+               }
+       }
+
+       *ret_reclaimed += shrink_zone(priority, zone, sc);
+
+out:
+       atomic_dec(&zone->nr_reclaimers);
+       wake_up_all(&zone->reclaim_throttle_waitq);
+
+       return ret;
+}
+
+static unsigned long shrink_zone_throttled(int priority, struct zone *zone,
+                                          struct scan_control *sc)
+{
+       unsigned long nr_reclaimed = 0;
+       int ret;
+
+       ret = do_shrink_zone_throttled(priority, zone, sc, &nr_reclaimed);
+
+       if (ret == -EAGAIN)
+               nr_reclaimed = 1;
+
+       return nr_reclaimed;
+}
+
 /*
  * This is the direct reclaim path, for page-allocating processes.  We only
  * try to reclaim pages from zones which will satisfy the caller's allocation
@@ -1268,12 +1317,11 @@ static unsigned long shrink_zone(int pri
  * If a zone is deemed to be full of pinned pages then just give it a light
  * scan then give up on it.
  */
-static unsigned long shrink_zones(int priority, struct zone **zones,
-                                       struct scan_control *sc)
+static int shrink_zones(int priority, struct zone **zones,
+                       struct scan_control *sc, unsigned long *ret_reclaimed)
 {
-       unsigned long nr_reclaimed = 0;
        int i;
-
+       int ret;
 
        sc->all_unreclaimable = 1;
        for (i = 0; zones[i] != NULL; i++) {
@@ -1304,10 +1352,15 @@ static unsigned long shrink_zones(int pr
                                                        priority);
                }
 
-               nr_reclaimed += shrink_zone(priority, zone, sc);
+               ret = do_shrink_zone_throttled(priority, zone, sc,
+                                              ret_reclaimed);
+               if (ret == -EAGAIN)
+                       goto out;
        }
+       ret = 0;
 
-       return nr_reclaimed;
+out:
+       return ret;
 }
  
 /*
@@ -1333,6 +1386,9 @@ static unsigned long do_try_to_free_page
        struct reclaim_state *reclaim_state = current->reclaim_state;
        unsigned long lru_pages = 0;
        int i;
+       unsigned long start_time = jiffies;
+       unsigned long last_check_time = jiffies;
+       int err;
 
        if (scan_global_lru(sc))
                count_vm_event(ALLOCSTALL);
@@ -1356,7 +1412,12 @@ static unsigned long do_try_to_free_page
                sc->nr_io_pages = 0;
                if (!priority)
                        disable_swap_token();
-               nr_reclaimed += shrink_zones(priority, zones, sc);
+               err = shrink_zones(priority, zones, sc, &nr_reclaimed);
+               if (err == -EAGAIN) {
+                       ret = 1;
+                       goto out;
+               }
+
                /*
                 * Don't shrink slabs when reclaiming memory from
                 * over limit cgroups
@@ -1389,8 +1450,28 @@ static unsigned long do_try_to_free_page
 
                /* Take a nap, wait for some writeback to complete */
                if (sc->nr_scanned && priority < DEF_PRIORITY - 2 &&
-                               sc->nr_io_pages > sc->swap_cluster_max)
+                   sc->nr_io_pages > sc->swap_cluster_max) {
                        congestion_wait(WRITE, HZ/10);
+
+               }
+
+               if (scan_global_lru(sc) &&
+                   time_after(jiffies, start_time+HZ) &&
+                   time_after(jiffies, last_check_time+HZ/10)) {
+                       last_check_time = jiffies;
+
+                       /* more reclaim until needed? */
+                       for (i = 0; zones[i] != NULL; i++) {
+                               struct zone *zone = zones[i];
+
+                               if (zone_watermark_ok(zone, sc->order,
+                                                     4*zone->pages_high,
+                                                     zone_idx(zones[0]), 0)) {
+                                       ret = 1;
+                                       goto out;
+                               }
+                       }
+               }
        }
        /* top priority shrink_caches still had more to do? don't OOM, then */
        if (!sc->all_unreclaimable && scan_global_lru(sc))
@@ -1588,7 +1669,9 @@ loop_again:
                         */
                        if (!zone_watermark_ok(zone, order, 8*zone->pages_high,
                                                end_zone, 0))
-                               nr_reclaimed += shrink_zone(priority, zone, 
&sc);
+                               nr_reclaimed += shrink_zone_throttled(priority,
+                                                                     zone,
+                                                                     &sc);
                        reclaim_state->reclaimed_slab = 0;
                        nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
                                                lru_pages);




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to