From: Mel Gorman <mgor...@suse.de>

Note: Based on "mm/mpol: Use special PROT_NONE to migrate pages"
but     sufficiently different that the signed-off-bys were dropped

Combine our previous _PAGE_NUMA, mpol_misplaced and
migrate_misplaced_page() pieces into an effective migrate on
fault scheme.

Note that (on x86) we rely on PROT_NONE pages being !present and
avoid the TLB flush from try_to_unmap(TTU_MIGRATION). This
greatly improves the page-migration performance.

Based-on-work-by: Peter Zijlstra <a.p.zijls...@chello.nl>
Signed-off-by: Mel Gorman <mgor...@suse.de>
Cc: Rik van Riel <r...@redhat.com>
Cc: Johannes Weiner <han...@cmpxchg.org>
Cc: Hugh Dickins <hu...@google.com>
Cc: Paul Turner <p...@google.com>
Cc: Lee Schermerhorn <lee.schermerh...@hp.com>
Cc: Alex Shi <lkml.a...@gmail.com>
Cc: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Cc: Aneesh Kumar <aneesh.ku...@linux.vnet.ibm.com>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijls...@chello.nl>
Cc: Andrea Arcangeli <aarca...@redhat.com>
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 include/linux/huge_mm.h |  8 ++++----
 mm/huge_memory.c        | 31 ++++++++++++++++++++++++++++---
 mm/memory.c             | 32 +++++++++++++++++++++++++++-----
 3 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 6cd7dcb..dabb510 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -160,8 +160,8 @@ static inline struct page *compound_trans_head(struct page 
*page)
        return page;
 }
 
-extern int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-                                 pmd_t pmd, pmd_t *pmdp);
+extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct 
*vma,
+                               unsigned long addr, pmd_t pmd, pmd_t *pmdp);
 
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
@@ -200,8 +200,8 @@ static inline int pmd_trans_huge_lock(pmd_t *pmd,
        return 0;
 }
 
-static inline int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long 
addr,
-                                       pmd_t pmd, pmd_t *pmdp)
+static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct 
vm_area_struct *vma,
+                                       unsigned long addr, pmd_t pmd, pmd_t 
*pmdp)
 {
        return 0;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 900eb1b..5723b55 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -18,6 +18,7 @@
 #include <linux/freezer.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
+#include <linux/migrate.h>
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
 #include "internal.h"
@@ -1019,17 +1020,39 @@ out:
 }
 
 /* NUMA hinting page fault entry point for trans huge pmds */
-int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned long addr,
-                               pmd_t pmd, pmd_t *pmdp)
+int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
+                               unsigned long addr, pmd_t pmd, pmd_t *pmdp)
 {
+       struct page *page = NULL;
        unsigned long haddr = addr & HPAGE_PMD_MASK;
-       struct page *page;
+       int target_nid;
 
        spin_lock(&mm->page_table_lock);
        if (unlikely(!pmd_same(pmd, *pmdp)))
                goto out_unlock;
 
        page = pmd_page(pmd);
+       get_page(page);
+       spin_unlock(&mm->page_table_lock);
+
+       target_nid = mpol_misplaced(page, vma, haddr);
+       if (target_nid == -1)
+               goto clear_pmdnuma;
+
+       /*
+        * Due to lacking code to migrate thp pages, we'll split
+        * (which preserves the special PROT_NONE) and re-take the
+        * fault on the normal pages.
+        */
+       split_huge_page(page);
+       put_page(page);
+       return 0;
+
+clear_pmdnuma:
+       spin_lock(&mm->page_table_lock);
+       if (unlikely(!pmd_same(pmd, *pmdp)))
+               goto out_unlock;
+
        pmd = pmd_mknonnuma(pmd);
        set_pmd_at(mm, haddr, pmdp, pmd);
        VM_BUG_ON(pmd_numa(*pmdp));
@@ -1037,6 +1060,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, unsigned 
long addr,
 
 out_unlock:
        spin_unlock(&mm->page_table_lock);
+       if (page)
+               put_page(page);
        return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index 290b80a..174f006 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 #include <linux/gfp.h>
+#include <linux/migrate.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3451,8 +3452,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, 
struct vm_area_struct *vma,
 int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                   unsigned long addr, pte_t pte, pte_t *ptep, pmd_t *pmd)
 {
-       struct page *page;
+       struct page *page = NULL;
        spinlock_t *ptl;
+       int current_nid, target_nid;
 
        /*
        * The "pte" at this point cannot be used safely without
@@ -3465,8 +3467,11 @@ int do_numa_page(struct mm_struct *mm, struct 
vm_area_struct *vma,
        */
        ptl = pte_lockptr(mm, pmd);
        spin_lock(ptl);
-       if (unlikely(!pte_same(*ptep, pte)))
-               goto out_unlock;
+       if (unlikely(!pte_same(*ptep, pte))) {
+               pte_unmap_unlock(ptep, ptl);
+               goto out;
+       }
+
        pte = pte_mknonnuma(pte);
        set_pte_at(mm, addr, ptep, pte);
        update_mmu_cache(vma, addr, ptep);
@@ -3477,8 +3482,25 @@ int do_numa_page(struct mm_struct *mm, struct 
vm_area_struct *vma,
                return 0;
        }
 
-out_unlock:
+       get_page(page);
+       current_nid = page_to_nid(page);
+       target_nid = mpol_misplaced(page, vma, addr);
        pte_unmap_unlock(ptep, ptl);
+       if (target_nid == -1) {
+               /*
+                * Account for the fault against the current node if it not
+                * being replaced regardless of where the page is located.
+                */
+               current_nid = numa_node_id();
+               put_page(page);
+               goto out;
+       }
+
+       /* Migrate to the requested node */
+       if (migrate_misplaced_page(page, target_nid))
+               current_nid = target_nid;
+
+out:
        return 0;
 }
 
@@ -3647,7 +3669,7 @@ retry:
                barrier();
                if (pmd_trans_huge(orig_pmd)) {
                        if (pmd_numa(*pmd))
-                               return do_huge_pmd_numa_page(mm, address,
+                               return do_huge_pmd_numa_page(mm, vma, address,
                                                             orig_pmd, pmd);
 
                        if ((flags & FAULT_FLAG_WRITE) && !pmd_write(orig_pmd)) 
{
-- 
1.7.11.7

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to