[PATCH v10 10/16] vrange: Purging vrange-anon pages from shrinker
This patch provides the logic to discard anonymous vranges by generating the page list for the volatile ranges setting the ptes volatile, and discarding the pages. Cc: Mel Gorman Cc: Hugh Dickins Cc: Dave Hansen Cc: Rik van Riel Cc: KOSAKI Motohiro Cc: Michel Lespinasse Cc: Johannes Weiner Signed-off-by: John Stultz [jstultz: Code tweaks and commit log rewording] Signed-off-by: Minchan Kim --- mm/vrange.c | 184 ++- 1 file changed, 183 insertions(+), 1 deletion(-) diff --git a/mm/vrange.c b/mm/vrange.c index 4a52b7a05f9a..0fa669c56ab8 100644 --- a/mm/vrange.c +++ b/mm/vrange.c @@ -11,6 +11,8 @@ #include #include "internal.h" #include +#include +#include static struct kmem_cache *vrange_cachep; @@ -19,6 +21,11 @@ static struct vrange_list { spinlock_t lock; } vrange_list; +struct vrange_walker { + struct vm_area_struct *vma; + struct list_head *pagelist; +}; + static inline unsigned long vrange_size(struct vrange *range) { return range->node.last + 1 - range->node.start; @@ -682,11 +689,186 @@ static struct vrange *vrange_isolate(void) return vrange; } -static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard) +static unsigned long discard_vrange_pagelist(struct list_head *page_list) +{ + struct page *page; + unsigned int nr_discard = 0; + LIST_HEAD(ret_pages); + LIST_HEAD(free_pages); + + while (!list_empty(page_list)) { + int err; + page = list_entry(page_list->prev, struct page, lru); + list_del(>lru); + if (!trylock_page(page)) { + list_add(>lru, _pages); + continue; + } + + /* +* discard_vpage returns unlocked page if it +* is successful +*/ + err = discard_vpage(page); + if (err) { + unlock_page(page); + list_add(>lru, _pages); + continue; + } + + ClearPageActive(page); + list_add(>lru, _pages); + dec_zone_page_state(page, NR_ISOLATED_ANON); + nr_discard++; + } + + free_hot_cold_page_list(_pages, 1); + list_splice(_pages, page_list); + return nr_discard; +} + +static void vrange_pte_entry(pte_t pteval, unsigned long address, + unsigned ptent_size, struct mm_walk *walk) { + struct page *page; + struct vrange_walker *vw = walk->private; + struct vm_area_struct *vma = vw->vma; + struct list_head *pagelist = vw->pagelist; + + if (pte_none(pteval)) + return; + + if (!pte_present(pteval)) + return; + + page = vm_normal_page(vma, address, pteval); + if (unlikely(!page)) + return; + + if (!PageLRU(page) || PageLocked(page)) + return; + + BUG_ON(PageCompound(page)); + + if (isolate_lru_page(page)) + return; + + list_add(>lru, pagelist); + + VM_BUG_ON(page_is_file_cache(page)); + inc_zone_page_state(page, NR_ISOLATED_ANON); +} + +static int vrange_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct vrange_walker *vw = walk->private; + struct vm_area_struct *uninitialized_var(vma); + pte_t *pte; + spinlock_t *ptl; + + vma = vw->vma; + split_huge_page_pmd(vma, addr, pmd); + if (pmd_trans_unstable(pmd)) + return 0; + + pte = pte_offset_map_lock(walk->mm, pmd, addr, ); + for (; addr != end; pte++, addr += PAGE_SIZE) + vrange_pte_entry(*pte, addr, PAGE_SIZE, walk); + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); + return 0; } +static unsigned long discard_vma_pages(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned long ret = 0; + LIST_HEAD(pagelist); + struct vrange_walker vw; + struct mm_walk vrange_walk = { + .pmd_entry = vrange_pte_range, + .mm = vma->vm_mm, + .private = , + }; + + vw.pagelist = + vw.vma = vma; + + walk_page_range(start, end, _walk); + + if (!list_empty()) + ret = discard_vrange_pagelist(); + + putback_lru_pages(); + return ret; +} + +/* + * vrange->owner isn't stable because caller doesn't hold vrange_lock + * so avoid touching vrange->owner. + */ +static int __discard_vrange_anon(struct mm_struct *mm, struct vrange *vrange, + unsigned long *ret_discard) +{ + struct vm_area_struct *vma; + unsigned long nr_discard = 0; + unsigned long start = vrange->node.start; + unsigned long end =
[PATCH v10 10/16] vrange: Purging vrange-anon pages from shrinker
This patch provides the logic to discard anonymous vranges by generating the page list for the volatile ranges setting the ptes volatile, and discarding the pages. Cc: Mel Gorman m...@csn.ul.ie Cc: Hugh Dickins hu...@google.com Cc: Dave Hansen dave.han...@intel.com Cc: Rik van Riel r...@redhat.com Cc: KOSAKI Motohiro kosaki.motoh...@gmail.com Cc: Michel Lespinasse wal...@google.com Cc: Johannes Weiner han...@cmpxchg.org Signed-off-by: John Stultz john.stu...@linaro.org [jstultz: Code tweaks and commit log rewording] Signed-off-by: Minchan Kim minc...@kernel.org --- mm/vrange.c | 184 ++- 1 file changed, 183 insertions(+), 1 deletion(-) diff --git a/mm/vrange.c b/mm/vrange.c index 4a52b7a05f9a..0fa669c56ab8 100644 --- a/mm/vrange.c +++ b/mm/vrange.c @@ -11,6 +11,8 @@ #include linux/hugetlb.h #include internal.h #include linux/mmu_notifier.h +#include linux/mm_inline.h +#include linux/migrate.h static struct kmem_cache *vrange_cachep; @@ -19,6 +21,11 @@ static struct vrange_list { spinlock_t lock; } vrange_list; +struct vrange_walker { + struct vm_area_struct *vma; + struct list_head *pagelist; +}; + static inline unsigned long vrange_size(struct vrange *range) { return range-node.last + 1 - range-node.start; @@ -682,11 +689,186 @@ static struct vrange *vrange_isolate(void) return vrange; } -static int discard_vrange(struct vrange *vrange, unsigned long *nr_discard) +static unsigned long discard_vrange_pagelist(struct list_head *page_list) +{ + struct page *page; + unsigned int nr_discard = 0; + LIST_HEAD(ret_pages); + LIST_HEAD(free_pages); + + while (!list_empty(page_list)) { + int err; + page = list_entry(page_list-prev, struct page, lru); + list_del(page-lru); + if (!trylock_page(page)) { + list_add(page-lru, ret_pages); + continue; + } + + /* +* discard_vpage returns unlocked page if it +* is successful +*/ + err = discard_vpage(page); + if (err) { + unlock_page(page); + list_add(page-lru, ret_pages); + continue; + } + + ClearPageActive(page); + list_add(page-lru, free_pages); + dec_zone_page_state(page, NR_ISOLATED_ANON); + nr_discard++; + } + + free_hot_cold_page_list(free_pages, 1); + list_splice(ret_pages, page_list); + return nr_discard; +} + +static void vrange_pte_entry(pte_t pteval, unsigned long address, + unsigned ptent_size, struct mm_walk *walk) { + struct page *page; + struct vrange_walker *vw = walk-private; + struct vm_area_struct *vma = vw-vma; + struct list_head *pagelist = vw-pagelist; + + if (pte_none(pteval)) + return; + + if (!pte_present(pteval)) + return; + + page = vm_normal_page(vma, address, pteval); + if (unlikely(!page)) + return; + + if (!PageLRU(page) || PageLocked(page)) + return; + + BUG_ON(PageCompound(page)); + + if (isolate_lru_page(page)) + return; + + list_add(page-lru, pagelist); + + VM_BUG_ON(page_is_file_cache(page)); + inc_zone_page_state(page, NR_ISOLATED_ANON); +} + +static int vrange_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct vrange_walker *vw = walk-private; + struct vm_area_struct *uninitialized_var(vma); + pte_t *pte; + spinlock_t *ptl; + + vma = vw-vma; + split_huge_page_pmd(vma, addr, pmd); + if (pmd_trans_unstable(pmd)) + return 0; + + pte = pte_offset_map_lock(walk-mm, pmd, addr, ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) + vrange_pte_entry(*pte, addr, PAGE_SIZE, walk); + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); + return 0; } +static unsigned long discard_vma_pages(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned long ret = 0; + LIST_HEAD(pagelist); + struct vrange_walker vw; + struct mm_walk vrange_walk = { + .pmd_entry = vrange_pte_range, + .mm = vma-vm_mm, + .private = vw, + }; + + vw.pagelist = pagelist; + vw.vma = vma; + + walk_page_range(start, end, vrange_walk); + + if (!list_empty(pagelist)) + ret = discard_vrange_pagelist(pagelist); + + putback_lru_pages(pagelist); + return ret; +} + +/* + * vrange-owner isn't stable because caller doesn't hold vrange_lock + * so avoid touching