This mostly copy of page cache implementation. Record refault
information when page swapped out, read it on swap in.

https://pmc.acronis.com/browse/VSTOR-19037
Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 drivers/staging/zcache/zcache-main.c |   2 +-
 include/linux/swap.h                 |  10 +--
 mm/shmem.c                           |   2 +-
 mm/swap_state.c                      | 123 ++++++++++++++++++++++++---
 mm/swapfile.c                        |   2 +-
 mm/tswap.c                           |   2 +-
 mm/vmscan.c                          |   6 +-
 mm/workingset.c                      |   3 +-
 8 files changed, 125 insertions(+), 25 deletions(-)

diff --git a/drivers/staging/zcache/zcache-main.c 
b/drivers/staging/zcache/zcache-main.c
index 01e8446b04d0..732be2143e64 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -948,7 +948,7 @@ static int zcache_get_swap_cache_page(int type, pgoff_t 
offset,
                /* May fail (-ENOMEM) if radix-tree node allocation failed. */
                __set_page_locked(new_page);
                SetPageSwapBacked(new_page);
-               err = __add_to_swap_cache(new_page, entry);
+               err = __add_to_swap_cache(new_page, entry, NULL);
                if (likely(!err)) {
                        radix_tree_preload_end();
                        lru_cache_add_anon(new_page);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7797cb88870b..2985b5f90ce5 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -455,9 +455,9 @@ extern struct address_space *swapper_spaces[];
 extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *, struct list_head *list);
-extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
-extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
-extern void __delete_from_swap_cache(struct page *);
+extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t, void **);
+extern int __add_to_swap_cache(struct page *page, swp_entry_t entry, void 
**shadow);
+extern void __delete_from_swap_cache(struct page *, void *shadow);
 extern void delete_from_swap_cache(struct page *);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
@@ -592,12 +592,12 @@ static inline int add_to_swap(struct page *page, struct 
list_head *list)
 }
 
 static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
-                                                       gfp_t gfp_mask)
+                               gfp_t gfp_mask, void **)
 {
        return -1;
 }
 
-static inline void __delete_from_swap_cache(struct page *page)
+static inline void __delete_from_swap_cache(struct page *page, void *shadow)
 {
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index cda801a5496b..b25e1423d407 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -995,7 +995,7 @@ static int shmem_writepage(struct page *page, struct 
writeback_control *wbc)
        if (list_empty(&info->swaplist))
                list_add_tail(&info->swaplist, &shmem_swaplist);
 
-       if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
+       if (add_to_swap_cache(page, swap, GFP_ATOMIC, NULL) == 0) {
                spin_lock(&info->lock);
                shmem_recalc_inode(inode);
                info->swapped++;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 83e48a7edb28..3931364e78a3 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -91,10 +91,12 @@ void show_swap_cache_info(void)
  * __add_to_swap_cache resembles add_to_page_cache_locked on swapper_space,
  * but sets SwapCache flag and private instead of mapping and index.
  */
-int __add_to_swap_cache(struct page *page, swp_entry_t entry)
+int __add_to_swap_cache(struct page *page, swp_entry_t entry, void **shadow)
 {
        int error;
+       void **slot;
        struct address_space *address_space;
+       struct radix_tree_node *node;
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(PageSwapCache(page), page);
@@ -106,13 +108,46 @@ int __add_to_swap_cache(struct page *page, swp_entry_t 
entry)
 
        address_space = swap_address_space(entry);
        spin_lock_irq(&address_space->tree_lock);
-       error = radix_tree_insert(&address_space->page_tree,
-                                       entry.val, page);
-       if (likely(!error)) {
-               address_space->nrpages++;
-               __inc_zone_page_state(page, NR_FILE_PAGES);
-               INC_CACHE_INFO(add_total);
+       error = __radix_tree_create(&address_space->page_tree, entry.val, 0,
+                               &node, &slot);
+       if (error)
+               goto out;
+       if (*slot) {
+               void *p;
+
+               p = radix_tree_deref_slot_protected(slot,
+                                               &address_space->tree_lock);
+               if (!radix_tree_very_exceptional_entry(p)) {
+                       error = -EEXIST;
+                       goto out;
+               }
+
+               address_space->nrexceptional--;
+               if (shadow)
+                       *shadow = p;
+               if (node)
+                       workingset_node_shadows_dec(node);
        }
+       radix_tree_replace_slot(slot, page);
+       address_space->nrpages++;
+       __inc_zone_page_state(page, NR_FILE_PAGES);
+       INC_CACHE_INFO(add_total);
+       if (node) {
+               workingset_node_pages_inc(node);
+               /*
+                * Don't track node that contains actual pages.
+                *
+                * Avoid acquiring the list_lru lock if already
+                * untracked.  The list_empty() test is safe as
+                * node->private_list is protected by
+                * mapping->tree_lock.
+                */
+               if (!list_empty(&node->private_list))
+                       list_lru_del(&workingset_shadow_nodes,
+                                    &node->private_list);
+       }
+
+out:
        spin_unlock_irq(&address_space->tree_lock);
 
        if (unlikely(error)) {
@@ -131,23 +166,78 @@ int __add_to_swap_cache(struct page *page, swp_entry_t 
entry)
 }
 
 
-int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
+int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask,
+               void **shadow)
 {
        int error;
 
        error = radix_tree_maybe_preload(gfp_mask);
        if (!error) {
-               error = __add_to_swap_cache(page, entry);
+               error = __add_to_swap_cache(page, entry, shadow);
                radix_tree_preload_end();
        }
        return error;
 }
+static void page_swap_cache_delete(struct address_space *mapping,
+                                  struct page *page, void *shadow)
+{
+       struct radix_tree_node *node;
+       void **slot;
+
+       VM_BUG_ON(!PageLocked(page));
+
+       __radix_tree_lookup(&mapping->page_tree, page_private(page), &node, 
&slot);
+       radix_tree_clear_tags(&mapping->page_tree, node, slot);
+
+       if (!node) {
+               /*
+                * We need a node to properly account shadow
+                * entries. Don't plant any without. XXX
+                */
+               shadow = NULL;
+       }
+
+       radix_tree_replace_slot(slot, shadow);
+
+       if (shadow) {
+               mapping->nrexceptional++;
+               /*
+                * Make sure the nrexceptional update is committed before
+                * the nrpages update so that final truncate racing
+                * with reclaim does not see both counters 0 at the
+                * same time and miss a shadow entry.
+                */
+               smp_wmb();
+       }
+
+       if (!node)
+               return;
+
+       workingset_node_pages_dec(node);
+       if (shadow)
+               workingset_node_shadows_inc(node);
+       else
+               if (__radix_tree_delete_node(&mapping->page_tree, node))
+                       return;
+
+       /*
+        * Track node that only contains shadow entries.
+        *
+        * Avoid acquiring the list_lru lock if already tracked.  The
+        * list_empty() test is safe as node->private_list is
+        * protected by mapping->tree_lock.
+        */
+       if (!workingset_node_pages(node) && list_empty(&node->private_list)) {
+               node->private_data = mapping;
+               list_lru_add(&workingset_shadow_nodes, &node->private_list);
+       }
+}
 
 /*
  * This must be called only on pages that have
  * been verified to be in the swap cache.
  */
-void __delete_from_swap_cache(struct page *page)
+void __delete_from_swap_cache(struct page *page, void *shadow)
 {
        swp_entry_t entry;
        struct address_space *address_space;
@@ -158,7 +248,7 @@ void __delete_from_swap_cache(struct page *page)
 
        entry.val = page_private(page);
        address_space = swap_address_space(entry);
-       radix_tree_delete(&address_space->page_tree, page_private(page));
+       page_swap_cache_delete(address_space, page, shadow);
        set_page_private(page, 0);
        ClearPageSwapCache(page);
        address_space->nrpages--;
@@ -203,7 +293,7 @@ int add_to_swap(struct page *page, struct list_head *list)
         * Add it to the swap cache.
         */
        err = add_to_swap_cache(page, entry,
-                       __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
+                       __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
 
        if (!err) {
                return 1;
@@ -232,7 +322,7 @@ void delete_from_swap_cache(struct page *page)
 
        address_space = swap_address_space(entry);
        spin_lock_irq(&address_space->tree_lock);
-       __delete_from_swap_cache(page);
+       __delete_from_swap_cache(page, NULL);
        spin_unlock_irq(&address_space->tree_lock);
 
        swapcache_free(entry);
@@ -323,6 +413,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, 
gfp_t gfp_mask,
 {
        struct page *found_page, *new_page = NULL;
        struct address_space *swapper_space = swap_address_space(entry);
+       void *shadow = NULL;
        int err;
        *new_page_allocated = false;
 
@@ -395,9 +486,13 @@ struct page *__read_swap_cache_async(swp_entry_t entry, 
gfp_t gfp_mask,
                /* May fail (-ENOMEM) if radix-tree node allocation failed. */
                __set_page_locked(new_page);
                SetPageSwapBacked(new_page);
-               err = __add_to_swap_cache(new_page, entry);
+               err = __add_to_swap_cache(new_page, entry, &shadow);
                if (likely(!err)) {
                        radix_tree_preload_end();
+                       if (shadow && workingset_refault(shadow)) {
+                               SetPageActive(new_page);
+                               workingset_activation(new_page);
+                       }
                        /*
                         * Initiate read into locked page and return.
                         */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 14043e6bf776..ffc3981c8c60 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1208,7 +1208,7 @@ int reuse_swap_page(struct page *page)
 
                        address_space = swap_address_space(entry);
                        spin_lock_irq(&address_space->tree_lock);
-                       __delete_from_swap_cache(page);
+                       __delete_from_swap_cache(page, NULL);
                        spin_unlock_irq(&address_space->tree_lock);
 
                        /* the page is still in use, do not uncharge */
diff --git a/mm/tswap.c b/mm/tswap.c
index 112a13d223d6..8b18bd17afcf 100644
--- a/mm/tswap.c
+++ b/mm/tswap.c
@@ -213,7 +213,7 @@ static int tswap_evict_page(struct page *page)
                goto out_free_swapcache;
 
        SetPageSwapBacked(page);
-       err = __add_to_swap_cache(page, entry);
+       err = __add_to_swap_cache(page, entry, NULL);
        if (err) {
                ClearPageSwapBacked(page);
                /* __add_to_swap_cache clears page->private on failure */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 583ba1abfc44..fe034747bb31 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -781,8 +781,12 @@ static int __remove_mapping(struct address_space *mapping, 
struct page *page,
 
        if (PageSwapCache(page)) {
                swp_entry_t swap = { .val = page_private(page) };
+               void *shadow = NULL;
+
                mem_cgroup_swapout(page, swap);
-               __delete_from_swap_cache(page);
+
+               shadow = workingset_eviction(mapping, page);
+               __delete_from_swap_cache(page, shadow);
                spin_unlock_irq(&mapping->tree_lock);
                swapcache_free(swap);
        } else {
diff --git a/mm/workingset.c b/mm/workingset.c
index 0b4cf96bb026..46865ad551ce 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -275,7 +275,8 @@ bool workingset_refault(void *shadow)
        }
        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
        refault = atomic_long_read(&lruvec->inactive_age);
-       active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE);
+       active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) +
+               lruvec_lru_size(lruvec, LRU_ACTIVE_ANON);
        rcu_read_unlock();
 
        /*
-- 
2.19.2

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to