Implement an approximation to Song Jiang's CLOCK-Pro page replacement
algorithm.  The algorithm has been extended to handle multiple memory
zones and, consequently, needed some changes in the active page limit
readjustment.

TODO:
 - verify that things work as expected
 - figure out where to put new anonymous pages

More information can be found at:
 - http://www.cs.wm.edu/hpcs/WWW/HTML/publications/abs05-3.html
 - http://linux-mm.org/wiki/ClockProApproximation

Signed-off-by: Rik van Riel <[EMAIL PROTECTED]>

Index: linux-2.6.12-vm/include/linux/mmzone.h
===================================================================
--- linux-2.6.12-vm.orig/include/linux/mmzone.h
+++ linux-2.6.12-vm/include/linux/mmzone.h
@@ -143,6 +143,8 @@ struct zone {
        unsigned long           nr_inactive;
        unsigned long           pages_scanned;     /* since last reclaim */
        int                     all_unreclaimable; /* All pages pinned */
+       unsigned long           active_limit;
+       unsigned long           active_scanned;
 
        /*
         * prev_priority holds the scanning priority for this zone.  It is
Index: linux-2.6.12-vm/include/linux/swap.h
===================================================================
--- linux-2.6.12-vm.orig/include/linux/swap.h
+++ linux-2.6.12-vm/include/linux/swap.h
@@ -154,10 +154,15 @@ extern void out_of_memory(unsigned int _
 extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct 
*);
 
 /* linux/mm/nonresident.c */
-extern int remember_page(struct address_space *, unsigned long);
+extern int do_remember_page(struct address_space *, unsigned long);
 extern int recently_evicted(struct address_space *, unsigned long);
 extern void init_nonresident(void);
 
+/* linux/mm/clockpro.c */
+extern void remember_page(struct page *, struct address_space *, unsigned 
long);
+extern int page_is_hot(struct page *, struct address_space *, unsigned long);
+DECLARE_PER_CPU(unsigned long, evicted_pages);
+
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalhigh_pages;
@@ -298,6 +303,9 @@ static inline swp_entry_t get_swap_page(
 #define remember_page(x,y)     0
 #define recently_evicted(x,y)  0
 
+/* linux/mm/clockpro.c */
+#define page_is_hot(x,y,z)     0
+
 #endif /* CONFIG_SWAP */
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
Index: linux-2.6.12-vm/mm/Makefile
===================================================================
--- linux-2.6.12-vm.orig/mm/Makefile
+++ linux-2.6.12-vm/mm/Makefile
@@ -13,7 +13,7 @@ obj-y                 := bootmem.o filemap.o mempool.o
                           prio_tree.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)     += page_io.o swap_state.o swapfile.o thrash.o \
-                          nonresident.o
+                          nonresident.o clockpro.o
 obj-$(CONFIG_HUGETLBFS)        += hugetlb.o
 obj-$(CONFIG_NUMA)     += mempolicy.o
 obj-$(CONFIG_SHMEM) += shmem.o
Index: linux-2.6.12-vm/mm/clockpro.c
===================================================================
--- /dev/null
+++ linux-2.6.12-vm/mm/clockpro.c
@@ -0,0 +1,102 @@
+/*
+ * mm/clockpro.c
+ * (C) 2005 Red Hat, Inc
+ * Written by Rik van Riel <[EMAIL PROTECTED]>
+ * Released under the GPL, see the file COPYING for details.
+ *
+ * Helper functions to implement CLOCK-Pro page replacement policy.
+ * For details see: http://linux-mm.org/wiki/AdvancedPageReplacement
+ */
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/swap.h>
+
+DEFINE_PER_CPU(unsigned long, evicted_pages);
+static unsigned long get_evicted(void)
+{
+       unsigned long total = 0;
+       int cpu;
+
+       for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++)
+               total += per_cpu(evicted_pages, cpu);
+
+       return total;
+}
+
+static unsigned long estimate_pageable_memory(void)
+{
+       static unsigned long next_check;
+       static unsigned long total;
+       unsigned long active, inactive, free;
+
+       if (time_after(jiffies, next_check)) {
+               get_zone_counts(&active, &inactive, &free);
+               total = active + inactive + free;
+               next_check = jiffies + HZ/10;
+       }
+
+       return total;
+}
+
+static void decay_clockpro_variables(void)
+{
+       struct zone * zone;
+       int cpu;
+
+       for (cpu = first_cpu(cpu_online_map); cpu < NR_CPUS; cpu++)
+               per_cpu(evicted_pages, cpu) /= 2;
+
+       for_each_zone(zone)
+               zone->active_scanned /= 2;
+}
+
+int page_is_hot(struct page * page, struct address_space * mapping,
+               unsigned long index)
+{
+       unsigned long long distance;
+       unsigned long long evicted;
+       int refault_distance;
+       struct zone *zone;
+
+       /* Was the page recently evicted ? */
+       refault_distance = recently_evicted(mapping, index);
+       if (refault_distance < 0)
+               return 0;
+
+       distance = estimate_pageable_memory() + refault_distance;
+       evicted = get_evicted();
+       zone = page_zone(page);
+
+       /* Only consider recent history for the calculation below. */
+       if (unlikely(evicted > distance))
+               decay_clockpro_variables();
+
+       /*
+        * Estimate whether the inter-reference distance of the tested
+        * page is smaller than the inter-reference distance of the
+        * oldest page on the active list.
+        *
+        *  distance        zone->nr_active
+        * ---------- <  ----------------------
+        *  evicted       zone->active_scanned
+        */
+       if (distance * zone->active_scanned < evicted * zone->nr_active) {
+               if (zone->active_limit > zone->present_pages / 8)
+                       zone->active_limit--;
+               return 1;
+       }
+
+       /* Increase the active limit more slowly. */
+       if ((evicted & 1) && zone->active_limit < zone->present_pages * 7 / 8)
+               zone->active_limit++;
+       return 0;
+}
+
+void remember_page(struct page * page, struct address_space * mapping,
+               unsigned long index)
+{
+       struct zone * zone = page_zone(page);
+       if (do_remember_page(mapping, index) && (index & 1) &&
+                       zone->active_limit < zone->present_pages * 7 / 8)
+               zone->active_limit++;
+}
Index: linux-2.6.12-vm/mm/filemap.c
===================================================================
--- linux-2.6.12-vm.orig/mm/filemap.c
+++ linux-2.6.12-vm/mm/filemap.c
@@ -401,9 +401,12 @@ int add_to_page_cache_lru(struct page *p
                                pgoff_t offset, int gfp_mask)
 {
        int ret = add_to_page_cache(page, mapping, offset, gfp_mask);
-       recently_evicted(mapping, offset);
-       if (ret == 0)
-               lru_cache_add(page);
+       if (ret == 0) {
+               if (page_is_hot(page, mapping, offset))
+                       lru_cache_add_active(page);
+               else
+                       lru_cache_add(page);
+       }
        return ret;
 }
 
Index: linux-2.6.12-vm/mm/nonresident.c
===================================================================
--- linux-2.6.12-vm.orig/mm/nonresident.c
+++ linux-2.6.12-vm/mm/nonresident.c
@@ -25,6 +25,7 @@
 #include <linux/prefetch.h>
 #include <linux/kernel.h>
 #include <linux/percpu.h>
+#include <linux/swap.h>
 
 /* Number of non-resident pages per hash bucket. Never smaller than 15. */
 #if (L1_CACHE_BYTES < 64)
@@ -101,7 +102,7 @@ int recently_evicted(struct address_spac
        return -1;
 }
 
-int remember_page(struct address_space * mapping, unsigned long index)
+int do_remember_page(struct address_space * mapping, unsigned long index)
 {
        struct nr_bucket * nr_bucket;
        u32 nrpage;
@@ -125,6 +126,7 @@ int remember_page(struct address_space *
        preempt_enable();
 
        /* Statistics may want to know whether the entry was in use. */
+       __get_cpu_var(evicted_pages)++;
        return xchg(&nr_bucket->page[i], nrpage);
 }
 
Index: linux-2.6.12-vm/mm/page_alloc.c
===================================================================
--- linux-2.6.12-vm.orig/mm/page_alloc.c
+++ linux-2.6.12-vm/mm/page_alloc.c
@@ -1715,6 +1715,7 @@ static void __init free_area_init_core(s
                zone->nr_scan_inactive = 0;
                zone->nr_active = 0;
                zone->nr_inactive = 0;
+               zone->active_limit = zone->present_pages * 2 / 3;
                if (!size)
                        continue;
 
Index: linux-2.6.12-vm/mm/swap_state.c
===================================================================
--- linux-2.6.12-vm.orig/mm/swap_state.c
+++ linux-2.6.12-vm/mm/swap_state.c
@@ -323,6 +323,7 @@ struct page *read_swap_cache_async(swp_e
                        struct vm_area_struct *vma, unsigned long addr)
 {
        struct page *found_page, *new_page = NULL;
+       int active;
        int err;
 
        do {
@@ -344,7 +345,7 @@ struct page *read_swap_cache_async(swp_e
                                break;          /* Out of memory */
                }
 
-               recently_evicted(&swapper_space, entry.val);
+               active = page_is_hot(new_page, &swapper_space, entry.val);
 
                /*
                 * Associate the page with swap entry in the swap cache.
@@ -361,7 +362,10 @@ struct page *read_swap_cache_async(swp_e
                        /*
                         * Initiate read into locked page and return.
                         */
-                       lru_cache_add_active(new_page);
+                       if (active) {
+                               lru_cache_add_active(new_page);
+                       } else
+                               lru_cache_add(new_page);
                        swap_readpage(NULL, new_page);
                        return new_page;
                }
Index: linux-2.6.12-vm/mm/vmscan.c
===================================================================
--- linux-2.6.12-vm.orig/mm/vmscan.c
+++ linux-2.6.12-vm/mm/vmscan.c
@@ -355,12 +355,14 @@ static int shrink_list(struct list_head 
        while (!list_empty(page_list)) {
                struct address_space *mapping;
                struct page *page;
+               struct zone *zone;
                int may_enter_fs;
                int referenced;
 
                cond_resched();
 
                page = lru_to_page(page_list);
+               zone = page_zone(page);
                list_del(&page->lru);
 
                if (TestSetPageLocked(page))
@@ -492,7 +494,7 @@ static int shrink_list(struct list_head 
 #ifdef CONFIG_SWAP
                if (PageSwapCache(page)) {
                        swp_entry_t swap = { .val = page->private };
-                       remember_page(&swapper_space, page->private);
+                       remember_page(page, &swapper_space, page->private);
                        __delete_from_swap_cache(page);
                        write_unlock_irq(&mapping->tree_lock);
                        swap_free(swap);
@@ -501,7 +503,7 @@ static int shrink_list(struct list_head 
                }
 #endif /* CONFIG_SWAP */
 
-               remember_page(page->mapping, page->index);
+               remember_page(page, page->mapping, page->index);
                __remove_from_page_cache(page);
                write_unlock_irq(&mapping->tree_lock);
                __put_page(page);
@@ -684,6 +686,7 @@ refill_inactive_zone(struct zone *zone, 
        pgmoved = isolate_lru_pages(nr_pages, &zone->active_list,
                                    &l_hold, &pgscanned);
        zone->pages_scanned += pgscanned;
+       zone->active_scanned += pgscanned;
        zone->nr_active -= pgmoved;
        spin_unlock_irq(&zone->lru_lock);
 
@@ -799,10 +802,15 @@ shrink_zone(struct zone *zone, struct sc
        unsigned long nr_inactive;
 
        /*
-        * Add one to `nr_to_scan' just to make sure that the kernel will
-        * slowly sift through the active list.
+        * Scan the active list if we have too many active pages.
+        * The limit is automatically adjusted through refaults
+        * measuring how well the VM did in the past.
         */
-       zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+       if (zone->nr_active > zone->active_limit)
+               zone->nr_scan_active += zone->nr_active - zone->active_limit;
+       else if (sc->priority < DEF_PRIORITY - 2)
+               zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+
        nr_active = zone->nr_scan_active;
        if (nr_active >= sc->swap_cluster_max)
                zone->nr_scan_active = 0;

--
-- 
All Rights Reversed
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to