Commit-ID:  713f937655c4b15131b5a0eae4610918a4febe17
Gitweb:     http://git.kernel.org/tip/713f937655c4b15131b5a0eae4610918a4febe17
Author:     Peter Zijlstra <[email protected]>
AuthorDate: Fri, 12 Oct 2012 19:30:14 +0200
Committer:  Ingo Molnar <[email protected]>
CommitDate: Mon, 15 Oct 2012 14:18:40 +0200

sched/numa/mm: Improve migration

Add THP migration. Extend task_numa_fault() to absorb THP faults.

[ Would be nice if the gents on Cc: expressed their opinion about
  this change. A missing detail might be cgroup page accounting,
  plus the fact that some architectures might cache PMD_NONE pmds
  in their TLBs, needing some extra TLB magic beyond what we already
  do here? ]

Signed-off-by: Peter Zijlstra <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/n/[email protected]
[ Significant fixes ]
Signed-off-by: Ingo Molnar <[email protected]>
---
 include/linux/sched.h |    4 +-
 kernel/sched/fair.c   |    4 +-
 mm/huge_memory.c      |  142 +++++++++++++++++++++++++++++++++++++++---------
 mm/migrate.c          |    2 +-
 4 files changed, 120 insertions(+), 32 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 22be2d6..2c3009b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1609,7 +1609,7 @@ static inline int tsk_home_node(struct task_struct *p)
 }
 
 extern void task_numa_placement(void);
-extern void task_numa_fault(int node);
+extern void task_numa_fault(int node, int pages);
 #else
 static inline int tsk_home_node(struct task_struct *p)
 {
@@ -1620,7 +1620,7 @@ static inline void task_numa_placement(void)
 {
 }
 
-static inline void task_numa_fault(int node)
+static inline void task_numa_fault(int node, int pages)
 {
 }
 #endif /* CONFIG_SCHED_NUMA */
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7d522d0..df35c8d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -835,7 +835,7 @@ unsigned int sysctl_sched_numa_settle_count = 2;
 /*
  * Got a PROT_NONE fault for a page on @node.
  */
-void task_numa_fault(int node)
+void task_numa_fault(int node, int pages)
 {
        struct task_struct *p = current;
 
@@ -846,7 +846,7 @@ void task_numa_fault(int node)
                        return;
        }
 
-       p->numa_faults[node]++;
+       p->numa_faults[node] += pages;
 }
 
 void task_numa_placement(void)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d14c8b2..2b65116 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -767,11 +767,13 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
                           unsigned int flags, pmd_t entry)
 {
        unsigned long haddr = address & HPAGE_PMD_MASK;
+       struct page *new_page = NULL;
        struct page *page = NULL;
+       int node, lru;
 
        spin_lock(&mm->page_table_lock);
        if (unlikely(!pmd_same(*pmd, entry)))
-               goto out_unlock;
+               goto unlock;
 
        if (unlikely(pmd_trans_splitting(entry))) {
                spin_unlock(&mm->page_table_lock);
@@ -779,44 +781,130 @@ void do_huge_pmd_prot_none(struct mm_struct *mm, struct 
vm_area_struct *vma,
                return;
        }
 
-#ifdef CONFIG_NUMA
        page = pmd_page(entry);
-       VM_BUG_ON(!PageCompound(page) || !PageHead(page));
+       if (page) {
+               VM_BUG_ON(!PageCompound(page) || !PageHead(page));
 
-       get_page(page);
+               get_page(page);
+               node = mpol_misplaced(page, vma, haddr);
+               if (node != -1)
+                       goto migrate;
+       }
+
+fixup:
+       /* change back to regular protection */
+       entry = pmd_modify(entry, vma->vm_page_prot);
+       set_pmd_at(mm, haddr, pmd, entry);
+       update_mmu_cache(vma, address, entry);
+
+unlock:
        spin_unlock(&mm->page_table_lock);
+       if (page) {
+               task_numa_placement();
+               task_numa_fault(page_to_nid(page), HPAGE_PMD_NR);
+               put_page(page);
+       }
+       return;
 
-       /*
-        * XXX should we serialize against split_huge_page ?
-        */
+migrate:
+       WARN_ON(!(((unsigned long)page->mapping & PAGE_MAPPING_ANON)));
+       WARN_ON((((unsigned long)page->mapping & PAGE_MAPPING_KSM)));
+       BUG_ON(PageSwapCache(page));
+
+       spin_unlock(&mm->page_table_lock);
 
-       if (mpol_misplaced(page, vma, haddr) == -1)
-               goto do_fixup;
+       lock_page(page);
+       spin_lock(&mm->page_table_lock);
+       if (unlikely(!pmd_same(*pmd, entry))) {
+               spin_unlock(&mm->page_table_lock);
+               unlock_page(page);
+               put_page(page);
+               return;
+       }
+       spin_unlock(&mm->page_table_lock);
 
-       /*
-        * Due to lacking code to migrate thp pages, we'll split
-        * (which preserves the special PROT_NONE) and re-take the
-        * fault on the normal pages.
-        */
-       split_huge_page(page);
-       put_page(page);
-       return;
+       task_numa_placement();
+
+       new_page = alloc_pages_node(node,
+           (GFP_TRANSHUGE | GFP_THISNODE) & ~(__GFP_NO_KSWAPD | __GFP_WAIT),
+           HPAGE_PMD_ORDER);
+
+       WARN_ON(PageLRU(new_page));
+
+       if (!new_page)
+               goto alloc_fail;
+
+       lru = PageLRU(page);
+
+       if (lru && isolate_lru_page(page)) /* does an implicit get_page() */
+               goto alloc_fail;
+
+       if (!trylock_page(new_page))
+               BUG();
+
+       /* anon mapping, we can simply copy page->mapping to the new page: */
+       new_page->mapping = page->mapping;
+       new_page->index = page->index;
+
+       migrate_page_copy(new_page, page);
+
+       WARN_ON(PageLRU(new_page));
 
-do_fixup:
        spin_lock(&mm->page_table_lock);
-       if (unlikely(!pmd_same(*pmd, entry)))
-               goto out_unlock;
-#endif
+       if (unlikely(!pmd_same(*pmd, entry))) {
+               spin_unlock(&mm->page_table_lock);
+               if (lru)
+                       putback_lru_page(page);
 
-       /* change back to regular protection */
-       entry = pmd_modify(entry, vma->vm_page_prot);
-       if (pmdp_set_access_flags(vma, haddr, pmd, entry, 1))
-               update_mmu_cache(vma, address, entry);
+               unlock_page(new_page);
+               ClearPageActive(new_page);      /* Set by migrate_page_copy() */
+               new_page->mapping = NULL;
+               put_page(new_page);             /* Free it */
 
-out_unlock:
+               unlock_page(page);
+               put_page(page);                 /* Drop the local reference */
+
+               return;
+       }
+
+       entry = mk_pmd(new_page, vma->vm_page_prot);
+       entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+       entry = pmd_mkhuge(entry);
+
+       page_add_new_anon_rmap(new_page, vma, haddr);
+
+       set_pmd_at(mm, haddr, pmd, entry);
+       update_mmu_cache(vma, address, entry);
+       page_remove_rmap(page);
        spin_unlock(&mm->page_table_lock);
-       if (page)
+
+       put_page(page);                 /* Drop the rmap reference */
+
+       task_numa_fault(node, HPAGE_PMD_NR);
+
+       if (lru)
+               put_page(page);         /* drop the LRU isolation reference */
+
+       unlock_page(new_page);
+       unlock_page(page);
+       put_page(page);                 /* Drop the local reference */
+
+       return;
+
+alloc_fail:
+       if (new_page)
+               put_page(new_page);
+
+       task_numa_fault(page_to_nid(page), HPAGE_PMD_NR);
+       unlock_page(page);
+
+       spin_lock(&mm->page_table_lock);
+       if (unlikely(!pmd_same(*pmd, entry))) {
                put_page(page);
+               page = NULL;
+               goto unlock;
+       }
+       goto fixup;
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
diff --git a/mm/migrate.c b/mm/migrate.c
index e03ed0b..e3cff03 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -417,7 +417,7 @@ int migrate_huge_page_move_mapping(struct address_space 
*mapping,
  */
 void migrate_page_copy(struct page *newpage, struct page *page)
 {
-       if (PageHuge(page))
+       if (PageHuge(page) || PageTransHuge(page))
                copy_huge_page(newpage, page);
        else
                copy_highpage(newpage, page);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to