[PATCH v10 10/16] vrange: Purging vrange-anon pages from shrinker

2014-01-01 Thread Minchan Kim
This patch provides the logic to discard anonymous vranges by
generating the page list for the volatile ranges setting the ptes
volatile, and discarding the pages.

Cc: Mel Gorman 
Cc: Hugh Dickins 
Cc: Dave Hansen 
Cc: Rik van Riel 
Cc: KOSAKI Motohiro 
Cc: Michel Lespinasse 
Cc: Johannes Weiner 
Signed-off-by: John Stultz 
[jstultz: Code tweaks and commit log rewording]
Signed-off-by: Minchan Kim 
---
 mm/vrange.c |  184 ++-
 1 file changed, 183 insertions(+), 1 deletion(-)

diff --git a/mm/vrange.c b/mm/vrange.c
index 4a52b7a05f9a..0fa669c56ab8 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -11,6 +11,8 @@
 #include 
 #include "internal.h"
 #include 
+#include 
+#include 
 
 static struct kmem_cache *vrange_cachep;
 
@@ -19,6 +21,11 @@ static struct vrange_list {
spinlock_t lock;
 } vrange_list;
 
+struct vrange_walker {
+   struct vm_area_struct *vma;
+   struct list_head *pagelist;
+};
+
 static inline unsigned long vrange_size(struct vrange *range)
 {
return range->node.last + 1 - range->node.start;
@@ -682,11 +689,186 @@ static struct vrange *vrange_isolate(void)
return vrange;
 }
 
-static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard)
+static unsigned long discard_vrange_pagelist(struct list_head *page_list)
+{
+   struct page *page;
+   unsigned int nr_discard = 0;
+   LIST_HEAD(ret_pages);
+   LIST_HEAD(free_pages);
+
+   while (!list_empty(page_list)) {
+   int err;
+   page = list_entry(page_list->prev, struct page, lru);
+   list_del(>lru);
+   if (!trylock_page(page)) {
+   list_add(>lru, _pages);
+   continue;
+   }
+
+   /*
+* discard_vpage returns unlocked page if it
+* is successful
+*/
+   err = discard_vpage(page);
+   if (err) {
+   unlock_page(page);
+   list_add(>lru, _pages);
+   continue;
+   }
+
+   ClearPageActive(page);
+   list_add(>lru, _pages);
+   dec_zone_page_state(page, NR_ISOLATED_ANON);
+   nr_discard++;
+   }
+
+   free_hot_cold_page_list(_pages, 1);
+   list_splice(_pages, page_list);
+   return nr_discard;
+}
+
+static void vrange_pte_entry(pte_t pteval, unsigned long address,
+   unsigned ptent_size, struct mm_walk *walk)
 {
+   struct page *page;
+   struct vrange_walker *vw = walk->private;
+   struct vm_area_struct *vma = vw->vma;
+   struct list_head *pagelist = vw->pagelist;
+
+   if (pte_none(pteval))
+   return;
+
+   if (!pte_present(pteval))
+   return;
+
+   page = vm_normal_page(vma, address, pteval);
+   if (unlikely(!page))
+   return;
+
+   if (!PageLRU(page) || PageLocked(page))
+   return;
+
+   BUG_ON(PageCompound(page));
+
+   if (isolate_lru_page(page))
+   return;
+
+   list_add(>lru, pagelist);
+
+   VM_BUG_ON(page_is_file_cache(page));
+   inc_zone_page_state(page, NR_ISOLATED_ANON);
+}
+
+static int vrange_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+   struct mm_walk *walk)
+{
+   struct vrange_walker *vw = walk->private;
+   struct vm_area_struct *uninitialized_var(vma);
+   pte_t *pte;
+   spinlock_t *ptl;
+
+   vma = vw->vma;
+   split_huge_page_pmd(vma, addr, pmd);
+   if (pmd_trans_unstable(pmd))
+   return 0;
+
+   pte = pte_offset_map_lock(walk->mm, pmd, addr, );
+   for (; addr != end; pte++, addr += PAGE_SIZE)
+   vrange_pte_entry(*pte, addr, PAGE_SIZE, walk);
+   pte_unmap_unlock(pte - 1, ptl);
+   cond_resched();
+
return 0;
 }
 
+static unsigned long discard_vma_pages(struct mm_struct *mm,
+   struct vm_area_struct *vma, unsigned long start,
+   unsigned long end)
+{
+   unsigned long ret = 0;
+   LIST_HEAD(pagelist);
+   struct vrange_walker vw;
+   struct mm_walk vrange_walk = {
+   .pmd_entry = vrange_pte_range,
+   .mm = vma->vm_mm,
+   .private = ,
+   };
+
+   vw.pagelist = 
+   vw.vma = vma;
+
+   walk_page_range(start, end, _walk);
+
+   if (!list_empty())
+   ret = discard_vrange_pagelist();
+
+   putback_lru_pages();
+   return ret;
+}
+
+/*
+ * vrange->owner isn't stable because caller doesn't hold vrange_lock
+ * so avoid touching vrange->owner.
+ */
+static int __discard_vrange_anon(struct mm_struct *mm, struct vrange *vrange,
+   unsigned long *ret_discard)
+{
+   struct vm_area_struct *vma;
+   unsigned long nr_discard = 0;
+   unsigned long start = vrange->node.start;
+   unsigned long end = 

[PATCH v10 10/16] vrange: Purging vrange-anon pages from shrinker

2014-01-01 Thread Minchan Kim
This patch provides the logic to discard anonymous vranges by
generating the page list for the volatile ranges setting the ptes
volatile, and discarding the pages.

Cc: Mel Gorman m...@csn.ul.ie
Cc: Hugh Dickins hu...@google.com
Cc: Dave Hansen dave.han...@intel.com
Cc: Rik van Riel r...@redhat.com
Cc: KOSAKI Motohiro kosaki.motoh...@gmail.com
Cc: Michel Lespinasse wal...@google.com
Cc: Johannes Weiner han...@cmpxchg.org
Signed-off-by: John Stultz john.stu...@linaro.org
[jstultz: Code tweaks and commit log rewording]
Signed-off-by: Minchan Kim minc...@kernel.org
---
 mm/vrange.c |  184 ++-
 1 file changed, 183 insertions(+), 1 deletion(-)

diff --git a/mm/vrange.c b/mm/vrange.c
index 4a52b7a05f9a..0fa669c56ab8 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -11,6 +11,8 @@
 #include linux/hugetlb.h
 #include internal.h
 #include linux/mmu_notifier.h
+#include linux/mm_inline.h
+#include linux/migrate.h
 
 static struct kmem_cache *vrange_cachep;
 
@@ -19,6 +21,11 @@ static struct vrange_list {
spinlock_t lock;
 } vrange_list;
 
+struct vrange_walker {
+   struct vm_area_struct *vma;
+   struct list_head *pagelist;
+};
+
 static inline unsigned long vrange_size(struct vrange *range)
 {
return range-node.last + 1 - range-node.start;
@@ -682,11 +689,186 @@ static struct vrange *vrange_isolate(void)
return vrange;
 }
 
-static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard)
+static unsigned long discard_vrange_pagelist(struct list_head *page_list)
+{
+   struct page *page;
+   unsigned int nr_discard = 0;
+   LIST_HEAD(ret_pages);
+   LIST_HEAD(free_pages);
+
+   while (!list_empty(page_list)) {
+   int err;
+   page = list_entry(page_list-prev, struct page, lru);
+   list_del(page-lru);
+   if (!trylock_page(page)) {
+   list_add(page-lru, ret_pages);
+   continue;
+   }
+
+   /*
+* discard_vpage returns unlocked page if it
+* is successful
+*/
+   err = discard_vpage(page);
+   if (err) {
+   unlock_page(page);
+   list_add(page-lru, ret_pages);
+   continue;
+   }
+
+   ClearPageActive(page);
+   list_add(page-lru, free_pages);
+   dec_zone_page_state(page, NR_ISOLATED_ANON);
+   nr_discard++;
+   }
+
+   free_hot_cold_page_list(free_pages, 1);
+   list_splice(ret_pages, page_list);
+   return nr_discard;
+}
+
+static void vrange_pte_entry(pte_t pteval, unsigned long address,
+   unsigned ptent_size, struct mm_walk *walk)
 {
+   struct page *page;
+   struct vrange_walker *vw = walk-private;
+   struct vm_area_struct *vma = vw-vma;
+   struct list_head *pagelist = vw-pagelist;
+
+   if (pte_none(pteval))
+   return;
+
+   if (!pte_present(pteval))
+   return;
+
+   page = vm_normal_page(vma, address, pteval);
+   if (unlikely(!page))
+   return;
+
+   if (!PageLRU(page) || PageLocked(page))
+   return;
+
+   BUG_ON(PageCompound(page));
+
+   if (isolate_lru_page(page))
+   return;
+
+   list_add(page-lru, pagelist);
+
+   VM_BUG_ON(page_is_file_cache(page));
+   inc_zone_page_state(page, NR_ISOLATED_ANON);
+}
+
+static int vrange_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+   struct mm_walk *walk)
+{
+   struct vrange_walker *vw = walk-private;
+   struct vm_area_struct *uninitialized_var(vma);
+   pte_t *pte;
+   spinlock_t *ptl;
+
+   vma = vw-vma;
+   split_huge_page_pmd(vma, addr, pmd);
+   if (pmd_trans_unstable(pmd))
+   return 0;
+
+   pte = pte_offset_map_lock(walk-mm, pmd, addr, ptl);
+   for (; addr != end; pte++, addr += PAGE_SIZE)
+   vrange_pte_entry(*pte, addr, PAGE_SIZE, walk);
+   pte_unmap_unlock(pte - 1, ptl);
+   cond_resched();
+
return 0;
 }
 
+static unsigned long discard_vma_pages(struct mm_struct *mm,
+   struct vm_area_struct *vma, unsigned long start,
+   unsigned long end)
+{
+   unsigned long ret = 0;
+   LIST_HEAD(pagelist);
+   struct vrange_walker vw;
+   struct mm_walk vrange_walk = {
+   .pmd_entry = vrange_pte_range,
+   .mm = vma-vm_mm,
+   .private = vw,
+   };
+
+   vw.pagelist = pagelist;
+   vw.vma = vma;
+
+   walk_page_range(start, end, vrange_walk);
+
+   if (!list_empty(pagelist))
+   ret = discard_vrange_pagelist(pagelist);
+
+   putback_lru_pages(pagelist);
+   return ret;
+}
+
+/*
+ * vrange-owner isn't stable because caller doesn't hold vrange_lock
+ * so avoid touching