Instead of having the vrange trees hanging directly off of the mm_struct, use a vrange_root structure, which will allow us to have vrange_roots that hang off the mm_struct for anonomous memory, as well as address_space structures for file backed memory.
Cc: linux...@kvack.org Cc: Michael Kerrisk <mtk.manpa...@gmail.com> Cc: Arun Sharma <asha...@fb.com> Cc: Mel Gorman <m...@csn.ul.ie> Cc: Hugh Dickins <hu...@google.com> Cc: Dave Hansen <d...@sr71.net> Cc: Rik van Riel <r...@redhat.com> Cc: Neil Brown <ne...@suse.de> Cc: Mike Hommey <m...@glandium.org> Cc: Taras Glek <tg...@mozilla.com> Cc: KOSAKI Motohiro <kosaki.motoh...@jp.fujitsu.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hir...@jp.fujitsu.com> Cc: Jason Evans <j...@fb.com> Cc: san...@google.com Cc: Paul Turner <p...@google.com> Cc: Johannes Weiner <han...@cmpxchg.org> Cc: Michel Lespinasse <wal...@google.com> Cc: Andrew Morton <a...@linux-foundation.org> Cc: Minchan Kim <minc...@kernel.org> Signed-off-by: John Stultz <john.stu...@linaro.org> --- fs/proc/task_mmu.c | 10 +-- include/linux/mm_types.h | 4 +- include/linux/vrange.h | 35 +++++----- include/linux/vrange_types.h | 21 ++++++ kernel/fork.c | 2 +- mm/vrange.c | 156 ++++++++++++++++++++++-------------------- 6 files changed, 126 insertions(+), 102 deletions(-) create mode 100644 include/linux/vrange_types.h diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index df009f0..11f63d4 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -391,13 +391,13 @@ static void *v_start(struct seq_file *m, loff_t *pos) if (!mm || IS_ERR(mm)) return mm; - vrange_lock(mm); - root = &mm->v_rb; + vrange_lock(&mm->vroot); + root = &mm->vroot.v_rb; - if (RB_EMPTY_ROOT(&mm->v_rb)) + if (RB_EMPTY_ROOT(&mm->vroot.v_rb)) goto out; - next = rb_first(&mm->v_rb); + next = rb_first(&mm->vroot.v_rb); range = vrange_entry(next); while(n > 0 && range) { n--; @@ -432,7 +432,7 @@ static void v_stop(struct seq_file *m, void *v) struct proc_vrange_private *priv = m->private; if (priv->task) { struct mm_struct *mm = priv->task->mm; - vrange_unlock(mm); + vrange_unlock(&mm->vroot); mmput(mm); put_task_struct(priv->task); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 080bf74..2e02a6d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -14,6 +14,7 @@ #include <linux/uprobes.h> #include <linux/page-flags-layout.h> #include <linux/mutex.h> +#include <linux/vrange_types.h> #include <asm/page.h> #include <asm/mmu.h> @@ -353,8 +354,7 @@ struct mm_struct { #ifdef CONFIG_MMU - struct rb_root v_rb; /* vrange rb tree */ - struct mutex v_lock; /* Protect v_rb */ + struct vrange_root vroot; #endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ diff --git a/include/linux/vrange.h b/include/linux/vrange.h index 4bcec40..b9b219c 100644 --- a/include/linux/vrange.h +++ b/include/linux/vrange.h @@ -1,42 +1,39 @@ #ifndef _LINUX_VRANGE_H #define _LINUX_VRANGE_H -#include <linux/mutex.h> -#include <linux/interval_tree.h> +#include <linux/vrange_types.h> #include <linux/mm.h> -struct vrange { - struct interval_tree_node node; - bool purged; - struct mm_struct *mm; - struct list_head lru; /* protected by lru_lock */ - atomic_t refcount; -}; - #define vrange_entry(ptr) \ container_of(ptr, struct vrange, node.rb) #ifdef CONFIG_MMU -struct mm_struct; static inline void mm_init_vrange(struct mm_struct *mm) { - mm->v_rb = RB_ROOT; - mutex_init(&mm->v_lock); + mm->vroot.v_rb = RB_ROOT; + mutex_init(&mm->vroot.v_lock); +} + +static inline void vrange_lock(struct vrange_root *vroot) +{ + mutex_lock(&vroot->v_lock); } -static inline void vrange_lock(struct mm_struct *mm) +static inline void vrange_unlock(struct vrange_root *vroot) { - mutex_lock(&mm->v_lock); + mutex_unlock(&vroot->v_lock); } -static inline void vrange_unlock(struct mm_struct *mm) +static inline struct mm_struct *vrange_get_owner_mm(struct vrange *vrange) { - mutex_unlock(&mm->v_lock); + + return container_of(vrange->owner, struct mm_struct, vroot); } -extern void exit_vrange(struct mm_struct *mm); + void vrange_init(void); +extern void mm_exit_vrange(struct mm_struct *mm); int discard_vpage(struct page *page); bool vrange_address(struct mm_struct *mm, unsigned long start, unsigned long end); @@ -50,7 +47,7 @@ void lru_move_vrange_to_head(struct mm_struct *mm, unsigned long address); static inline void vrange_init(void) {}; static inline void mm_init_vrange(struct mm_struct *mm) {}; -static inline void exit_vrange(struct mm_struct *mm); +static inline void mm_exit_vrange(struct mm_struct *mm); static inline bool vrange_address(struct mm_struct *mm, unsigned long start, unsigned long end) { return false; }; diff --git a/include/linux/vrange_types.h b/include/linux/vrange_types.h new file mode 100644 index 0000000..bede336 --- /dev/null +++ b/include/linux/vrange_types.h @@ -0,0 +1,21 @@ +#ifndef _LINUX_VRANGE_TYPES_H +#define _LINUX_VRANGE_TYPES_H + +#include <linux/mutex.h> +#include <linux/interval_tree.h> + +struct vrange_root { + struct rb_root v_rb; /* vrange rb tree */ + struct mutex v_lock; /* Protect v_rb */ +}; + + +struct vrange { + struct interval_tree_node node; + struct vrange_root *owner; + bool purged; + struct list_head lru; /* protected by lru_lock */ + atomic_t refcount; +}; +#endif + diff --git a/kernel/fork.c b/kernel/fork.c index e3aa120..f2da4a0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -614,7 +614,7 @@ void mmput(struct mm_struct *mm) if (atomic_dec_and_test(&mm->mm_users)) { uprobe_clear_state(mm); - exit_vrange(mm); + mm_exit_vrange(mm); exit_aio(mm); ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ diff --git a/mm/vrange.c b/mm/vrange.c index d07884d..9facbbc 100644 --- a/mm/vrange.c +++ b/mm/vrange.c @@ -39,10 +39,12 @@ void __init vrange_init(void) } static inline void __set_vrange(struct vrange *range, - unsigned long start_idx, unsigned long end_idx) + unsigned long start_idx, unsigned long end_idx, + bool purged) { range->node.start = start_idx; range->node.last = end_idx; + range->purged = purged; } static void lru_add_vrange(struct vrange *vrange) @@ -63,12 +65,13 @@ static void lru_remove_vrange(struct vrange *vrange) void lru_move_vrange_to_head(struct mm_struct *mm, unsigned long address) { - struct rb_root *root = &mm->v_rb; + struct vrange_root *vroot = &mm->vroot; struct interval_tree_node *node; struct vrange *vrange; - vrange_lock(mm); - node = interval_tree_iter_first(root, address, address + PAGE_SIZE - 1); + vrange_lock(vroot); + node = interval_tree_iter_first(&vroot->v_rb, address, + address + PAGE_SIZE - 1); if (node) { vrange = container_of(node, struct vrange, node); spin_lock(&lru_lock); @@ -81,22 +84,21 @@ void lru_move_vrange_to_head(struct mm_struct *mm, unsigned long address) list_move(&vrange->lru, &lru_vrange); spin_unlock(&lru_lock); } - vrange_unlock(mm); + vrange_unlock(vroot); } -static void __add_range(struct vrange *range, - struct rb_root *root, struct mm_struct *mm) +static void __add_range(struct vrange *range, struct vrange_root *vroot) { - range->mm = mm; + range->owner = vroot; lru_add_vrange(range); - interval_tree_insert(&range->node, root); + interval_tree_insert(&range->node, &vroot->v_rb); } /* remove range from interval tree */ -static void __remove_range(struct vrange *range, - struct rb_root *root) +static void __remove_range(struct vrange *range) { - interval_tree_remove(&range->node, root); + interval_tree_remove(&range->node, &range->owner->v_rb); + range->owner = NULL; } static struct vrange *alloc_vrange(void) @@ -104,11 +106,13 @@ static struct vrange *alloc_vrange(void) struct vrange *vrange = kmem_cache_alloc(vrange_cachep, GFP_KERNEL); if (vrange) atomic_set(&vrange->refcount, 1); + vrange->owner = NULL; return vrange; } static void free_vrange(struct vrange *range) { + WARN_ON(range->owner); lru_remove_vrange(range); kmem_cache_free(vrange_cachep, range); } @@ -120,20 +124,20 @@ static void put_vrange(struct vrange *range) free_vrange(range); } -static inline void range_resize(struct rb_root *root, - struct vrange *range, - unsigned long start, unsigned long end, - struct mm_struct *mm) +static inline void range_resize(struct vrange *range, + unsigned long start, unsigned long end) { - __remove_range(range, root); - __set_vrange(range, start, end); - __add_range(range, root, mm); + struct vrange_root *vroot = range->owner; + bool purged = range->purged; + + __remove_range(range); + __set_vrange(range, start, end, purged); + __add_range(range, vroot); } -static int add_vrange(struct mm_struct *mm, +static int add_vrange(struct vrange_root *vroot, unsigned long start, unsigned long end) { - struct rb_root *root; struct vrange *new_range, *range; struct interval_tree_node *node, *next; int purged = 0; @@ -142,9 +146,8 @@ static int add_vrange(struct mm_struct *mm, if (!new_range) return -ENOMEM; - root = &mm->v_rb; - vrange_lock(mm); - node = interval_tree_iter_first(root, start, end); + vrange_lock(vroot); + node = interval_tree_iter_first(&vroot->v_rb, start, end); while (node) { next = interval_tree_iter_next(node, start, end); @@ -158,24 +161,22 @@ static int add_vrange(struct mm_struct *mm, end = max_t(unsigned long, end, node->last); purged |= range->purged; - __remove_range(range, root); + __remove_range(range); put_vrange(range); node = next; } - __set_vrange(new_range, start, end); - new_range->purged = purged; - __add_range(new_range, root, mm); + __set_vrange(new_range, start, end, purged); + __add_range(new_range, vroot); out: - vrange_unlock(mm); + vrange_unlock(vroot); return 0; } -static int remove_vrange(struct mm_struct *mm, +static int remove_vrange(struct vrange_root *vroot, unsigned long start, unsigned long end) { - struct rb_root *root; struct vrange *new_range, *range; struct interval_tree_node *node, *next; int ret = 0; @@ -185,10 +186,9 @@ static int remove_vrange(struct mm_struct *mm, if (!new_range) return -ENOMEM; - root = &mm->v_rb; - vrange_lock(mm); + vrange_lock(vroot); - node = interval_tree_iter_first(root, start, end); + node = interval_tree_iter_first(&vroot->v_rb, start, end); while (node) { next = interval_tree_iter_next(node, start, end); @@ -196,42 +196,40 @@ static int remove_vrange(struct mm_struct *mm, ret |= range->purged; if (start <= node->start && end >= node->last) { - __remove_range(range, root); + __remove_range(range); put_vrange(range); } else if (node->start >= start) { - range_resize(root, range, end, node->last, mm); + range_resize(range, end, node->last); } else if (node->last <= end) { - range_resize(root, range, node->start, start, mm); + range_resize(range, node->start, start); } else { used_new = true; - __set_vrange(new_range, end, node->last); - new_range->purged = range->purged; - new_range->mm = mm; - range_resize(root, range, node->start, start, mm); - __add_range(new_range, root, mm); + __set_vrange(new_range, end, node->last, range->purged); + range_resize(range, node->start, start); + __add_range(new_range, vroot); break; } node = next; } - vrange_unlock(mm); + vrange_unlock(vroot); if (!used_new) put_vrange(new_range); return ret; } -void exit_vrange(struct mm_struct *mm) +void mm_exit_vrange(struct mm_struct *mm) { struct vrange *range; struct rb_node *next; - next = rb_first(&mm->v_rb); + next = rb_first(&mm->vroot.v_rb); while (next) { range = vrange_entry(next); next = rb_next(next); - __remove_range(range, &mm->v_rb); + __remove_range(range); put_vrange(range); } } @@ -285,17 +283,18 @@ SYSCALL_DEFINE4(vrange, unsigned long, start, goto out; if (mode == VRANGE_VOLATILE) - ret = add_vrange(mm, start, end - 1); + ret = add_vrange(&mm->vroot, start, end - 1); else if (mode == VRANGE_NOVOLATILE) - ret = remove_vrange(mm, start, end - 1); + ret = remove_vrange(&mm->vroot, start, end - 1); out: return ret; } + static bool __vrange_address(struct mm_struct *mm, unsigned long start, unsigned long end) { - struct rb_root *root = &mm->v_rb; + struct rb_root *root = &mm->vroot.v_rb; struct interval_tree_node *node; node = interval_tree_iter_first(root, start, end); @@ -306,10 +305,11 @@ bool vrange_address(struct mm_struct *mm, unsigned long start, unsigned long end) { bool ret; + struct vrange_root *vroot = &mm->vroot; - vrange_lock(mm); + vrange_lock(vroot); ret = __vrange_address(mm, start, end); - vrange_unlock(mm); + vrange_unlock(vroot); return ret; } @@ -372,14 +372,13 @@ static inline pte_t *vpage_check_address(struct page *page, return ptep; } -static void __vrange_purge(struct mm_struct *mm, +static void __vrange_purge(struct vrange_root *vroot, unsigned long start, unsigned long end) { - struct rb_root *root = &mm->v_rb; - struct vrange *range; struct interval_tree_node *node; + struct vrange *range; - node = interval_tree_iter_first(root, start, end); + node = interval_tree_iter_first(&vroot->v_rb, start, end); while (node) { range = container_of(node, struct vrange, node); range->purged = true; @@ -396,20 +395,19 @@ static int try_to_discard_one(struct page *page, struct vm_area_struct *vma, spinlock_t *ptl; int ret = 0; bool present; + struct vrange_root *vroot = &mm->vroot; VM_BUG_ON(!PageLocked(page)); - vrange_lock(mm); + vrange_lock(vroot); pte = vpage_check_address(page, mm, address, &ptl); if (!pte) { - vrange_unlock(mm); goto out; } if (vma->vm_flags & VM_LOCKED) { pte_unmap_unlock(pte, ptl); - vrange_unlock(mm); - return 0; + goto out; } present = pte_present(*pte); @@ -431,12 +429,13 @@ static int try_to_discard_one(struct page *page, struct vm_area_struct *vma, } set_pte_at(mm, address, pte, pteval); - __vrange_purge(mm, address, address + PAGE_SIZE -1); + __vrange_purge(&mm->vroot, address, address + PAGE_SIZE - 1); pte_unmap_unlock(pte, ptl); mmu_notifier_invalidate_page(mm, address); - vrange_unlock(mm); ret = 1; + out: + vrange_unlock(vroot); return ret; } @@ -458,12 +457,14 @@ static int try_to_discard_vpage(struct page *page) anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { pte_t *pte; spinlock_t *ptl; + struct vrange_root *vroot; vma = avc->vma; mm = vma->vm_mm; + vroot = &mm->vroot; address = vma_address(page, vma); - vrange_lock(mm); + vrange_lock(vroot); /* * We can't use page_check_address because it doesn't check * swap entry of the page table. We need the check because @@ -473,24 +474,24 @@ static int try_to_discard_vpage(struct page *page) */ pte = vpage_check_address(page, mm, address, &ptl); if (!pte) { - vrange_unlock(mm); + vrange_unlock(vroot); continue; } if (vma->vm_flags & VM_LOCKED) { pte_unmap_unlock(pte, ptl); - vrange_unlock(mm); + vrange_unlock(vroot); goto out; } pte_unmap_unlock(pte, ptl); if (!__vrange_address(mm, address, address + PAGE_SIZE - 1)) { - vrange_unlock(mm); + vrange_unlock(vroot); goto out; } - vrange_unlock(mm); + vrange_unlock(vroot); } anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { @@ -531,19 +532,20 @@ int discard_vpage(struct page *page) bool is_purged_vrange(struct mm_struct *mm, unsigned long address) { - struct rb_root *root = &mm->v_rb; + struct vrange_root *vroot = &mm->vroot; struct interval_tree_node *node; struct vrange *range; bool ret = false; - vrange_lock(mm); - node = interval_tree_iter_first(root, address, address + PAGE_SIZE - 1); + vrange_lock(vroot); + node = interval_tree_iter_first(&vroot->v_rb, address, + address + PAGE_SIZE - 1); if (node) { range = container_of(node, struct vrange, node); if (range->purged) ret = true; } - vrange_unlock(mm); + vrange_unlock(vroot); return ret; } @@ -631,12 +633,14 @@ static unsigned int discard_vma_pages(struct zone *zone, struct mm_struct *mm, unsigned int discard_vrange(struct zone *zone, struct vrange *vrange, int nr_to_discard) { - struct mm_struct *mm = vrange->mm; + struct mm_struct *mm; unsigned long start = vrange->node.start; unsigned long end = vrange->node.last; struct vm_area_struct *vma; unsigned int nr_discarded = 0; + mm = vrange_get_owner_mm(vrange); + if (!down_read_trylock(&mm->mmap_sem)) goto out; @@ -678,7 +682,7 @@ static struct vrange *get_victim_vrange(void) spin_lock(&lru_lock); list_for_each_prev_safe(cur, tmp, &lru_vrange) { vrange = list_entry(cur, struct vrange, lru); - mm = vrange->mm; + mm = vrange_get_owner_mm(vrange); /* the process is exiting so pass it */ if (atomic_read(&mm->mm_users) == 0) { list_del_init(&vrange->lru); @@ -698,7 +702,7 @@ static struct vrange *get_victim_vrange(void) * need to get a refcount of mm. * NOTE: We guarantee mm_count isn't zero in here because * if we found vrange from LRU list, it means we are - * before exit_vrange or remove_vrange. + * before mm_exit_vrange or remove_vrange. */ atomic_inc(&mm->mm_count); @@ -713,8 +717,10 @@ static struct vrange *get_victim_vrange(void) static void put_victim_range(struct vrange *vrange) { + struct mm_struct *mm = vrange_get_owner_mm(vrange); + put_vrange(vrange); - mmdrop(vrange->mm); + mmdrop(mm); } unsigned int discard_vrange_pages(struct zone *zone, int nr_to_discard) @@ -724,7 +730,7 @@ unsigned int discard_vrange_pages(struct zone *zone, int nr_to_discard) start_vrange = vrange = get_victim_vrange(); if (start_vrange) { - struct mm_struct *mm = start_vrange->mm; + struct mm_struct *mm = vrange_get_owner_mm(vrange); atomic_inc(&start_vrange->refcount); atomic_inc(&mm->mm_count); } -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/