[Devel] [PATCH RHEL7 COMMIT] ms/rmap: drop support of non-linear mappings

Kirill Tkhai Mon, 03 Oct 2016 07:45:12 -0700

The commit is pushed to "branch-rh7-3.10.0-327.36.1.vz7.18.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-327.36.1.vz7.18.3
------>
commit de430b3b071cfbeb42f515a1e869a86680a0bf86
Author: Kirill A. Shutemov <kirill.shute...@linux.intel.com>
Date:   Mon Oct 3 14:40:31 2016 +0000


    ms/rmap: drop support of non-linear mappings
    
    We don't create non-linear mappings anymore.  Let's drop code which
    handles them in rmap.
    
    Signed-off-by: Kirill A. Shutemov <kirill.shute...@linux.intel.com>
    Signed-off-by: Andrew Morton <a...@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>
    
    https://jira.sw.ru/browse/PSBM-52992
    
    (cherry picked from commit 27ba0644ea9dfe6e7693abc85837b60e40583b96)
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
    Reviewed-by: Cyrill Gorcunov <gorcu...@openvz.org>
---
 Documentation/cachetlb.txt |   8 +-
 fs/inode.c                 |   1 -
 include/linux/fs.h         |   4 +-
 include/linux/mm.h         |   6 --
 include/linux/mm_types.h   |   4 +-
 include/linux/rmap.h       |   3 -
 kernel/fork.c              |   8 +-
 mm/memory.c                |   5 --
 mm/mmap.c                  |  24 ++---
 mm/rmap.c                  | 214 +--------------------------------------------
 mm/swap.c                  |   4 +-
 11 files changed, 18 insertions(+), 263 deletions(-)

diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
index 2431096..cab2a07 100644
--- a/Documentation/cachetlb.txt
+++ b/Documentation/cachetlb.txt
@@ -318,10 +318,10 @@ maps this page at its virtual address.
        about doing this.
 
        The idea is, first at flush_dcache_page() time, if
-       page->mapping->i_mmap is an empty tree and ->i_mmap_nonlinear
-       an empty list, just mark the architecture private page flag bit.
-       Later, in update_mmu_cache(), a check is made of this flag bit,
-       and if set the flush is done and the flag bit is cleared.
+       page->mapping->i_mmap is an empty tree, just mark the architecture
+       private page flag bit.  Later, in update_mmu_cache(), a check is
+       made of this flag bit, and if set the flush is done and the flag
+       bit is cleared.
 
        IMPORTANT NOTE: It is often important, if you defer the flush,
                        that the actual flush occurs on the same CPU
diff --git a/fs/inode.c b/fs/inode.c
index 71dc454..6404cc5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -367,7 +367,6 @@ void address_space_init_once(struct address_space *mapping)
        INIT_LIST_HEAD(&mapping->private_list);
        spin_lock_init(&mapping->private_lock);
        mapping->i_mmap = RB_ROOT;
-       INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
        INIT_LIST_HEAD(&mapping->i_peer_list);
 }
 EXPORT_SYMBOL(address_space_init_once);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 408bb24..41e13f1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -559,7 +559,6 @@ struct address_space {
        spinlock_t              tree_lock;      /* and lock protecting it */
        atomic_t                i_mmap_writable;/* count VM_SHARED mappings */
        struct rb_root          i_mmap;         /* tree of private and shared 
mappings */
-       struct list_head        i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
        struct mutex            i_mmap_mutex;   /* protect tree, count, list */
        /* Protected by tree_lock together with the radix tree */
        unsigned long           nrpages;        /* number of total pages */
@@ -634,8 +633,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
  */
 static inline int mapping_mapped(struct address_space *mapping)
 {
-       return  !RB_EMPTY_ROOT(&mapping->i_mmap) ||
-               !list_empty(&mapping->i_mmap_nonlinear);
+       return  !RB_EMPTY_ROOT(&mapping->i_mmap);
 }
 
 /*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4ad0b19..d0392e5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1689,12 +1689,6 @@ struct vm_area_struct 
*vma_interval_tree_iter_next(struct vm_area_struct *node,
        for (vma = vma_interval_tree_iter_first(root, start, last);     \
             vma; vma = vma_interval_tree_iter_next(vma, start, last))
 
-static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
-                                       struct list_head *list)
-{
-       list_add_tail(&vma->shared.nonlinear, list);
-}
-
 void anon_vma_interval_tree_insert(struct anon_vma_chain *node,
                                   struct rb_root *root);
 void anon_vma_interval_tree_remove(struct anon_vma_chain *node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7a033ec..97c537a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -277,15 +277,13 @@ struct vm_area_struct {
 
        /*
         * For areas with an address space and backing store,
-        * linkage into the address_space->i_mmap interval tree, or
-        * linkage of vma in the address_space->i_mmap_nonlinear list.
+        * linkage into the address_space->i_mmap interval tree.
         */
        union {
                struct {
                        struct rb_node rb;
                        unsigned long rb_subtree_last;
                } linear;
-               struct list_head nonlinear;
        } shared;
 
        /*
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 2491eae..10869e7 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -243,7 +243,6 @@ int page_mapped_in_vma(struct page *page, struct 
vm_area_struct *vma);
  * arg: passed to rmap_one() and invalid_vma()
  * rmap_one: executed on each vma where page is mapped
  * done: for checking traversing termination condition
- * file_nonlinear: for handling file nonlinear mapping
  * anon_lock: for getting anon_lock by optimized way rather than default
  * invalid_vma: for skipping uninterested vma
  */
@@ -252,8 +251,6 @@ struct rmap_walk_control {
        int (*rmap_one)(struct page *page, struct vm_area_struct *vma,
                                        unsigned long addr, void *arg);
        int (*done)(struct page *page);
-       int (*file_nonlinear)(struct page *, struct address_space *,
-                                       struct vm_area_struct *vma);
        struct anon_vma *(*anon_lock)(struct page *page);
        bool (*invalid_vma)(struct vm_area_struct *vma, void *arg);
 };
diff --git a/kernel/fork.c b/kernel/fork.c
index 2fcde98..1e7b897 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -439,12 +439,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct 
*oldmm)
                                atomic_inc(&mapping->i_mmap_writable);
                        flush_dcache_mmap_lock(mapping);
                        /* insert tmp into the share list, just after mpnt */
-                       if (unlikely(tmp->vm_flags & VM_NONLINEAR))
-                               vma_nonlinear_insert(tmp,
-                                               &mapping->i_mmap_nonlinear);
-                       else
-                               vma_interval_tree_insert_after(tmp, mpnt,
-                                                       &mapping->i_mmap);
+                       vma_interval_tree_insert_after(tmp, mpnt,
+                                       &mapping->i_mmap);
                        flush_dcache_mmap_unlock(mapping);
                        mutex_unlock(&mapping->i_mmap_mutex);
                }
diff --git a/mm/memory.c b/mm/memory.c
index e93aa91..8a55890 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4546,13 +4546,8 @@ restart:
        vma_interval_tree_foreach(vma, &mapping->i_mmap, 0, ULONG_MAX)
                if (synchronize_mapping_faults_vma(mapping, vma))
                        goto restart;
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
-               if (synchronize_mapping_faults_vma(mapping, vma))
-                       goto restart;
        vma_interval_tree_foreach(vma, &mapping->i_mmap, 0, ULONG_MAX)
                vma->vm_private_data = NULL;
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
-               vma->vm_private_data = NULL;
 }
 
 void close_mapping_peer(struct address_space *mapping)
diff --git a/mm/mmap.c b/mm/mmap.c
index eeebddc..207a8f9 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -223,10 +223,7 @@ static void __remove_shared_vm_struct(struct 
vm_area_struct *vma,
                mapping_unmap_writable(mapping);
 
        flush_dcache_mmap_lock(mapping);
-       if (unlikely(vma->vm_flags & VM_NONLINEAR))
-               list_del_init(&vma->shared.nonlinear);
-       else
-               vma_interval_tree_remove(vma, &mapping->i_mmap);
+       vma_interval_tree_remove(vma, &mapping->i_mmap);
        flush_dcache_mmap_unlock(mapping);
 }
 
@@ -626,10 +623,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
                        atomic_inc(&mapping->i_mmap_writable);
 
                flush_dcache_mmap_lock(mapping);
-               if (unlikely(vma->vm_flags & VM_NONLINEAR))
-                       vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
-               else
-                       vma_interval_tree_insert(vma, &mapping->i_mmap);
+               vma_interval_tree_insert(vma, &mapping->i_mmap);
                flush_dcache_mmap_unlock(mapping);
        }
 }
@@ -761,14 +755,11 @@ again:                    remove_next = 1 + (end > 
next->vm_end);
 
        if (file) {
                mapping = file->f_mapping;
-               if (!(vma->vm_flags & VM_NONLINEAR)) {
-                       root = &mapping->i_mmap;
-                       uprobe_munmap(vma, vma->vm_start, vma->vm_end);
+               root = &mapping->i_mmap;
+               uprobe_munmap(vma, vma->vm_start, vma->vm_end);
 
-                       if (adjust_next)
-                               uprobe_munmap(next, next->vm_start,
-                                                       next->vm_end);
-               }
+               if (adjust_next)
+                       uprobe_munmap(next, next->vm_start, next->vm_end);
 
                mutex_lock(&mapping->i_mmap_mutex);
                if (insert) {
@@ -3197,8 +3188,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct 
address_space *mapping)
  *
  * mmap_sem in write mode is required in order to block all operations
  * that could modify pagetables and free pages without need of
- * altering the vma layout (for example populate_range() with
- * nonlinear vmas). It's also needed in write mode to avoid new
+ * altering the vma layout. It's also needed in write mode to avoid new
  * anon_vmas to be associated with existing vmas.
  *
  * A single task can't take more than one mm_take_all_locks() in a row
diff --git a/mm/rmap.c b/mm/rmap.c
index f18fdbb..c778134 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -553,9 +553,8 @@ unsigned long page_address_in_vma(struct page *page, struct 
vm_area_struct *vma)
                if (!vma->anon_vma || !page__anon_vma ||
                    vma->anon_vma->root != page__anon_vma->root)
                        return -EFAULT;
-       } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
-               if (!vma->vm_file ||
-                   vma->vm_file->f_mapping != page->mapping)
+       } else if (page->mapping) {
+               if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping)
                        return -EFAULT;
        } else
                return -EFAULT;
@@ -1302,7 +1301,6 @@ int try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                if (pte_soft_dirty(pteval))
                        swp_pte = pte_swp_mksoft_dirty(swp_pte);
                set_pte_at(mm, address, pte, swp_pte);
-               BUG_ON(pte_file(*pte));
        } else if (IS_ENABLED(CONFIG_MIGRATION) &&
                   (TTU_ACTION(flags) == TTU_MIGRATION)) {
                /* Establish migration entry for a file page */
@@ -1344,133 +1342,6 @@ out_mlock:
        return ret;
 }
 
-/*
- * objrmap doesn't work for nonlinear VMAs because the assumption that
- * offset-into-file correlates with offset-into-virtual-addresses does not 
hold.
- * Consequently, given a particular page and its ->index, we cannot locate the
- * ptes which are mapping that page without an exhaustive linear search.
- *
- * So what this code does is a mini "virtual scan" of each nonlinear VMA which
- * maps the file to which the target page belongs.  The ->vm_private_data field
- * holds the current cursor into that scan.  Successive searches will circulate
- * around the vma's virtual address space.
- *
- * So as more replacement pressure is applied to the pages in a nonlinear VMA,
- * more scanning pressure is placed against them as well.   Eventually pages
- * will become fully unmapped and are eligible for eviction.
- *
- * For very sparsely populated VMAs this is a little inefficient - chances are
- * there there won't be many ptes located within the scan cluster.  In this 
case
- * maybe we could scan further - to the end of the pte page, perhaps.
- *
- * Mlocked pages:  check VM_LOCKED under mmap_sem held for read, if we can
- * acquire it without blocking.  If vma locked, mlock the pages in the cluster,
- * rather than unmapping them.  If we encounter the "check_page" that vmscan is
- * trying to unmap, return SWAP_MLOCK, else default SWAP_AGAIN.
- */
-#define CLUSTER_SIZE   min(32*PAGE_SIZE, PMD_SIZE)
-#define CLUSTER_MASK   (~(CLUSTER_SIZE - 1))
-
-static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
-               struct vm_area_struct *vma, struct page *check_page)
-{
-       struct mm_struct *mm = vma->vm_mm;
-       pmd_t *pmd;
-       pte_t *pte;
-       pte_t pteval;
-       spinlock_t *ptl;
-       struct page *page;
-       unsigned long address;
-       unsigned long mmun_start;       /* For mmu_notifiers */
-       unsigned long mmun_end;         /* For mmu_notifiers */
-       unsigned long end;
-       int ret = SWAP_AGAIN;
-       int locked_vma = 0;
-
-       address = (vma->vm_start + cursor) & CLUSTER_MASK;
-       end = address + CLUSTER_SIZE;
-       if (address < vma->vm_start)
-               address = vma->vm_start;
-       if (end > vma->vm_end)
-               end = vma->vm_end;
-
-       pmd = mm_find_pmd(mm, address);
-       if (!pmd)
-               return ret;
-
-       mmun_start = address;
-       mmun_end   = end;
-       mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
-
-       /*
-        * If we can acquire the mmap_sem for read, and vma is VM_LOCKED,
-        * keep the sem while scanning the cluster for mlocking pages.
-        */
-       if (down_read_trylock(&vma->vm_mm->mmap_sem)) {
-               locked_vma = (vma->vm_flags & VM_LOCKED);
-               if (!locked_vma)
-                       up_read(&vma->vm_mm->mmap_sem); /* don't need it */
-       }
-
-       pte = pte_offset_map_lock(mm, pmd, address, &ptl);
-
-       /* Update high watermark before we lower rss */
-       update_hiwater_rss(mm);
-
-       for (; address < end; pte++, address += PAGE_SIZE) {
-               if (!pte_present(*pte))
-                       continue;
-               page = vm_normal_page(vma, address, *pte);
-               BUG_ON(!page || PageAnon(page));
-
-               if (locked_vma) {
-                       if (page == check_page) {
-                               /* we know we have check_page locked */
-                               mlock_vma_page(page);
-                               ret = SWAP_MLOCK;
-                       } else if (trylock_page(page)) {
-                               /*
-                                * If we can lock the page, perform mlock.
-                                * Otherwise leave the page alone, it will be
-                                * eventually encountered again later.
-                                */
-                               mlock_vma_page(page);
-                               unlock_page(page);
-                       }
-                       continue;       /* don't unmap */
-               }
-
-               if (ptep_clear_flush_young_notify(vma, address, pte))
-                       continue;
-
-               /* Nuke the page table entry. */
-               flush_cache_page(vma, address, pte_pfn(*pte));
-               pteval = ptep_clear_flush_notify(vma, address, pte);
-
-               /* If nonlinear, store the file page offset in the pte. */
-               if (page->index != linear_page_index(vma, address)) {
-                       pte_t ptfile = pgoff_to_pte(page->index);
-                       if (pte_soft_dirty(pteval))
-                               ptfile = pte_file_mksoft_dirty(ptfile);
-                       set_pte_at(mm, address, pte, ptfile);
-               }
-
-               /* Move the dirty bit to the physical page now the pte is gone. 
*/
-               if (pte_dirty(pteval))
-                       set_page_dirty_mm(page, mm);
-
-               page_remove_rmap(page);
-               page_cache_release(page);
-               dec_mm_counter(mm, MM_FILEPAGES);
-               (*mapcount)--;
-       }
-       pte_unmap_unlock(pte - 1, ptl);
-       mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-       if (locked_vma)
-               up_read(&vma->vm_mm->mmap_sem);
-       return ret;
-}
-
 bool is_vma_temporary_stack(struct vm_area_struct *vma)
 {
        int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
@@ -1560,10 +1431,6 @@ static int try_to_unmap_mapping(struct page *page,
        pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
-       unsigned long cursor;
-       unsigned long max_nl_cursor = 0;
-       unsigned long max_nl_size = 0;
-       unsigned int mapcount;
 
        if (PageHuge(page))
                pgoff = page->index << compound_order(page);
@@ -1575,75 +1442,6 @@ static int try_to_unmap_mapping(struct page *page,
                        goto out;
        }
 
-       if (list_empty(&mapping->i_mmap_nonlinear))
-               goto out;
-
-       /*
-        * We don't bother to try to find the munlocked page in nonlinears.
-        * It's costly. Instead, later, page reclaim logic may call
-        * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
-        */
-       if (TTU_ACTION(flags) == TTU_MUNLOCK)
-               goto out;
-
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
-               cursor = (unsigned long) vma->vm_private_data;
-               if (cursor > max_nl_cursor)
-                       max_nl_cursor = cursor;
-               cursor = vma->vm_end - vma->vm_start;
-               if (cursor > max_nl_size)
-                       max_nl_size = cursor;
-       }
-
-       if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
-               ret = SWAP_FAIL;
-               goto out;
-       }
-
-       /*
-        * We don't try to search for this page in the nonlinear vmas,
-        * and page_referenced wouldn't have found it anyway.  Instead
-        * just walk the nonlinear vmas trying to age and unmap some.
-        * The mapcount of the page we came in with is irrelevant,
-        * but even so use it as a guide to how hard we should try?
-        */
-       mapcount = page_mapcount(page);
-       if (!mapcount)
-               goto out;
-       cond_resched();
-
-       max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
-       if (max_nl_cursor == 0)
-               max_nl_cursor = CLUSTER_SIZE;
-
-       do {
-               list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
-                       cursor = (unsigned long) vma->vm_private_data;
-                       while ( cursor < max_nl_cursor &&
-                               cursor < vma->vm_end - vma->vm_start) {
-                               if (try_to_unmap_cluster(cursor, &mapcount,
-                                               vma, page) == SWAP_MLOCK)
-                                       ret = SWAP_MLOCK;
-                               cursor += CLUSTER_SIZE;
-                               vma->vm_private_data = (void *) cursor;
-                               if ((int)mapcount <= 0)
-                                       goto out;
-                       }
-                       vma->vm_private_data = (void *) max_nl_cursor;
-               }
-               cond_resched();
-               max_nl_cursor += CLUSTER_SIZE;
-       } while (max_nl_cursor <= max_nl_size);
-
-       /*
-        * Don't loop forever (perhaps all the remaining pages are
-        * in locked vmas).  Reset cursor on all unreserved nonlinear
-        * vmas, now forgetting on which ones it had fallen behind.
-        */
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
-               vma->vm_private_data = NULL;
 out:
        return ret;
 }
@@ -1837,14 +1635,6 @@ static int rmap_walk_file(struct page *page, struct 
rmap_walk_control *rwc)
                        goto done;
        }
 
-       if (!rwc->file_nonlinear)
-               goto done;
-
-       if (list_empty(&mapping->i_mmap_nonlinear))
-               goto done;
-
-       ret = rwc->file_nonlinear(page, mapping, vma);
-
 done:
        mutex_unlock(&mapping->i_mmap_mutex);
        return ret;
diff --git a/mm/swap.c b/mm/swap.c
index 0a42ebf..6c7f5fe 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1075,10 +1075,8 @@ void __init swap_setup(void)
        int i;
 
        bdi_init(swapper_spaces[0].backing_dev_info);
-       for (i = 0; i < MAX_SWAPFILES; i++) {
+       for (i = 0; i < MAX_SWAPFILES; i++)
                spin_lock_init(&swapper_spaces[i].tree_lock);
-               INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear);
-       }
 #endif
 
        /* Use a smaller cluster for small-memory machines */
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH RHEL7 COMMIT] ms/rmap: drop support of non-linear mappings

Reply via email to