This patch provides the logic to discard anonymous vranges by
generating the page list for the volatile ranges setting the ptes
volatile, and discarding the pages.

Cc: Mel Gorman <m...@csn.ul.ie>
Cc: Hugh Dickins <hu...@google.com>
Cc: Dave Hansen <dave.han...@intel.com>
Cc: Rik van Riel <r...@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motoh...@gmail.com>
Cc: Michel Lespinasse <wal...@google.com>
Cc: Johannes Weiner <han...@cmpxchg.org>
Signed-off-by: John Stultz <john.stu...@linaro.org>
[jstultz: Code tweaks and commit log rewording]
Signed-off-by: Minchan Kim <minc...@kernel.org>
---
 mm/vrange.c |  184 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 183 insertions(+), 1 deletion(-)

diff --git a/mm/vrange.c b/mm/vrange.c
index 4a52b7a05f9a..0fa669c56ab8 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -11,6 +11,8 @@
 #include <linux/hugetlb.h>
 #include "internal.h"
 #include <linux/mmu_notifier.h>
+#include <linux/mm_inline.h>
+#include <linux/migrate.h>
 
 static struct kmem_cache *vrange_cachep;
 
@@ -19,6 +21,11 @@ static struct vrange_list {
        spinlock_t lock;
 } vrange_list;
 
+struct vrange_walker {
+       struct vm_area_struct *vma;
+       struct list_head *pagelist;
+};
+
 static inline unsigned long vrange_size(struct vrange *range)
 {
        return range->node.last + 1 - range->node.start;
@@ -682,11 +689,186 @@ static struct vrange *vrange_isolate(void)
        return vrange;
 }
 
-static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard)
+static unsigned long discard_vrange_pagelist(struct list_head *page_list)
+{
+       struct page *page;
+       unsigned int nr_discard = 0;
+       LIST_HEAD(ret_pages);
+       LIST_HEAD(free_pages);
+
+       while (!list_empty(page_list)) {
+               int err;
+               page = list_entry(page_list->prev, struct page, lru);
+               list_del(&page->lru);
+               if (!trylock_page(page)) {
+                       list_add(&page->lru, &ret_pages);
+                       continue;
+               }
+
+               /*
+                * discard_vpage returns unlocked page if it
+                * is successful
+                */
+               err = discard_vpage(page);
+               if (err) {
+                       unlock_page(page);
+                       list_add(&page->lru, &ret_pages);
+                       continue;
+               }
+
+               ClearPageActive(page);
+               list_add(&page->lru, &free_pages);
+               dec_zone_page_state(page, NR_ISOLATED_ANON);
+               nr_discard++;
+       }
+
+       free_hot_cold_page_list(&free_pages, 1);
+       list_splice(&ret_pages, page_list);
+       return nr_discard;
+}
+
+static void vrange_pte_entry(pte_t pteval, unsigned long address,
+               unsigned ptent_size, struct mm_walk *walk)
 {
+       struct page *page;
+       struct vrange_walker *vw = walk->private;
+       struct vm_area_struct *vma = vw->vma;
+       struct list_head *pagelist = vw->pagelist;
+
+       if (pte_none(pteval))
+               return;
+
+       if (!pte_present(pteval))
+               return;
+
+       page = vm_normal_page(vma, address, pteval);
+       if (unlikely(!page))
+               return;
+
+       if (!PageLRU(page) || PageLocked(page))
+               return;
+
+       BUG_ON(PageCompound(page));
+
+       if (isolate_lru_page(page))
+               return;
+
+       list_add(&page->lru, pagelist);
+
+       VM_BUG_ON(page_is_file_cache(page));
+       inc_zone_page_state(page, NR_ISOLATED_ANON);
+}
+
+static int vrange_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+               struct mm_walk *walk)
+{
+       struct vrange_walker *vw = walk->private;
+       struct vm_area_struct *uninitialized_var(vma);
+       pte_t *pte;
+       spinlock_t *ptl;
+
+       vma = vw->vma;
+       split_huge_page_pmd(vma, addr, pmd);
+       if (pmd_trans_unstable(pmd))
+               return 0;
+
+       pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       for (; addr != end; pte++, addr += PAGE_SIZE)
+               vrange_pte_entry(*pte, addr, PAGE_SIZE, walk);
+       pte_unmap_unlock(pte - 1, ptl);
+       cond_resched();
+
        return 0;
 }
 
+static unsigned long discard_vma_pages(struct mm_struct *mm,
+               struct vm_area_struct *vma, unsigned long start,
+               unsigned long end)
+{
+       unsigned long ret = 0;
+       LIST_HEAD(pagelist);
+       struct vrange_walker vw;
+       struct mm_walk vrange_walk = {
+               .pmd_entry = vrange_pte_range,
+               .mm = vma->vm_mm,
+               .private = &vw,
+       };
+
+       vw.pagelist = &pagelist;
+       vw.vma = vma;
+
+       walk_page_range(start, end, &vrange_walk);
+
+       if (!list_empty(&pagelist))
+               ret = discard_vrange_pagelist(&pagelist);
+
+       putback_lru_pages(&pagelist);
+       return ret;
+}
+
+/*
+ * vrange->owner isn't stable because caller doesn't hold vrange_lock
+ * so avoid touching vrange->owner.
+ */
+static int __discard_vrange_anon(struct mm_struct *mm, struct vrange *vrange,
+               unsigned long *ret_discard)
+{
+       struct vm_area_struct *vma;
+       unsigned long nr_discard = 0;
+       unsigned long start = vrange->node.start;
+       unsigned long end = vrange->node.last + 1;
+       int ret = 0;
+
+       /* It prevent to destroy vma when the process exist */
+       if (!atomic_inc_not_zero(&mm->mm_users))
+               return ret;
+
+       if (!down_read_trylock(&mm->mmap_sem)) {
+               mmput(mm);
+               ret = -EAGAIN;
+               goto out; /* this vrange could be retried */
+       }
+
+       vma = find_vma(mm, start);
+       if (!vma || (vma->vm_start >= end))
+               goto out_unlock;
+
+       for (; vma; vma = vma->vm_next) {
+               if (vma->vm_start >= end)
+                       break;
+               BUG_ON(vma->vm_flags & (VM_SPECIAL|VM_LOCKED|VM_MIXEDMAP|
+                                       VM_HUGETLB));
+               cond_resched();
+               nr_discard += discard_vma_pages(mm, vma,
+                               max_t(unsigned long, start, vma->vm_start),
+                               min_t(unsigned long, end, vma->vm_end));
+       }
+out_unlock:
+       up_read(&mm->mmap_sem);
+       mmput(mm);
+       *ret_discard = nr_discard;
+out:
+       return ret;
+}
+
+static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard)
+{
+       int ret = 0;
+       struct mm_struct *mm;
+       struct vrange_root *vroot;
+       vroot = vrange->owner;
+
+       /* TODO : handle VRANGE_FILE */
+       if (vroot->type != VRANGE_MM)
+               goto out;
+
+       mm = vroot->object;
+       ret = __discard_vrange_anon(mm, vrange, nr_discard);
+out:
+       return ret;
+}
+
+
 #define VRANGE_SCAN_THRESHOLD  (4 << 20)
 
 unsigned long shrink_vrange(enum lru_list lru, struct lruvec *lruvec,
-- 
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to