From: Minchan Kim <[email protected]>

This patch adds the core volatile range shrinking logic
needed to allow volatile range purging to function on
swapless systems.

This patch does not wire in the specific range purging logic,
but that will be added in the following patches.

The reason I use shrinker is that Dave and Glauber are trying to
make slab shrinker being aware of node/memcg so if the patchset
reach on mainline, we also can support node/memcg in vrange, easily.

Another reason I selected slab shrinker is that normally slab shrinker
is called after normal reclaim of file-backed page(ex, page cache)
so reclaiming preference would be this, I expect.(TODO: invstigate
and might need more tunes in reclaim path)

        page cache -> vrange by slab shrinking -> anon page

It does make sense because page cache can have stream data so there is
no point to shrink vrange pages if there are lots of streaming pages
in page cache.

In this version, I didn't check it works well but it's design concept
so we can make it work via modify page reclaim path.
I will have more experiment.

One of disadvantage with using slab shrink is that slab shrinker isn't
called in using memcg so memcg-noswap system cannot take advantage of it.
Hmm, Maybe I will jump into relcaim code to hook some point to control
vrange page shrinking more freely.

Cc: Andrew Morton <[email protected]>
Cc: Android Kernel Team <[email protected]>
Cc: Robert Love <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Dmitry Adamushko <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Neil Brown <[email protected]>
Cc: Andrea Righi <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Aneesh Kumar K.V <[email protected]>
Cc: Mike Hommey <[email protected]>
Cc: Taras Glek <[email protected]>
Cc: Dhaval Giani <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: Michel Lespinasse <[email protected]>
Cc: Rob Clark <[email protected]>
Cc: Minchan Kim <[email protected]>
Cc: [email protected] <[email protected]>
Signed-off-by: Minchan Kim <[email protected]>
[jstultz: Renamed some functions and minor cleanups]
Signed-off-by: John Stultz <[email protected]>
---
 mm/vrange.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 86 insertions(+), 3 deletions(-)

diff --git a/mm/vrange.c b/mm/vrange.c
index 33e3ac1..e7c5a25 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -25,11 +25,19 @@ static inline unsigned int vrange_size(struct vrange *range)
        return range->node.last + 1 - range->node.start;
 }
 
+static int shrink_vrange(struct shrinker *s, struct shrink_control *sc);
+
+static struct shrinker vrange_shrinker = {
+       .shrink = shrink_vrange,
+       .seeks = DEFAULT_SEEKS
+};
+
 static int __init vrange_init(void)
 {
        INIT_LIST_HEAD(&vrange_list.list);
        mutex_init(&vrange_list.lock);
        vrange_cachep = KMEM_CACHE(vrange, SLAB_PANIC);
+       register_shrinker(&vrange_shrinker);
        return 0;
 }
 module_init(vrange_init);
@@ -58,9 +66,14 @@ static void __vrange_free(struct vrange *range)
 static inline void __vrange_lru_add(struct vrange *range)
 {
        mutex_lock(&vrange_list.lock);
-       WARN_ON(!list_empty(&range->lru));
-       list_add(&range->lru, &vrange_list.list);
-       vrange_list.size += vrange_size(range);
+       /*
+        * We need this check because it could be raced with
+        * shrink_vrange and vrange_resize
+        */
+       if (list_empty(&range->lru)) {
+               list_add(&range->lru, &vrange_list.list);
+               vrange_list.size += vrange_size(range);
+       }
        mutex_unlock(&vrange_list.lock);
 }
 
@@ -84,6 +97,14 @@ static void __vrange_add(struct vrange *range, struct 
vrange_root *vroot)
        __vrange_lru_add(range);
 }
 
+static inline int __vrange_get(struct vrange *vrange)
+{
+       if (!atomic_inc_not_zero(&vrange->refcount))
+               return 0;
+
+       return 1;
+}
+
 static inline void __vrange_put(struct vrange *range)
 {
        if (atomic_dec_and_test(&range->refcount)) {
@@ -647,3 +668,65 @@ int discard_vpage(struct page *page)
 
        return 1;
 }
+
+static struct vrange *vrange_isolate(void)
+{
+       struct vrange *vrange = NULL;
+       mutex_lock(&vrange_list.lock);
+       while (!list_empty(&vrange_list.list)) {
+               vrange = list_entry(vrange_list.list.prev,
+                               struct vrange, lru);
+               list_del_init(&vrange->lru);
+               vrange_list.size -= vrange_size(vrange);
+
+               /* vrange is going to destroy */
+               if (__vrange_get(vrange))
+                       break;
+
+               vrange = NULL;
+       }
+
+       mutex_unlock(&vrange_list.lock);
+       return vrange;
+}
+
+static unsigned int discard_vrange(struct vrange *vrange)
+{
+       return 0;
+}
+
+static int shrink_vrange(struct shrinker *s, struct shrink_control *sc)
+{
+       struct vrange *range = NULL;
+       long nr_to_scan = sc->nr_to_scan;
+       long size = vrange_list.size;
+
+       if (!nr_to_scan)
+               return size;
+
+       if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_IO))
+               return -1;
+
+       while (size > 0 && nr_to_scan > 0) {
+               range = vrange_isolate();
+               if (!range)
+                       break;
+
+               /* range is removing so don't bother */
+               if (!range->owner) {
+                       __vrange_put(range);
+                       size -= vrange_size(range);
+                       nr_to_scan -= vrange_size(range);
+                       continue;
+               }
+
+               if (discard_vrange(range) < 0)
+                       __vrange_lru_add(range);
+               __vrange_put(range);
+
+               size -= vrange_size(range);
+               nr_to_scan -= vrange_size(range);
+       }
+
+       return size;
+}
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to