Some arch may need an explicit IPI when clearing pmd
on collapse. Add new function which arch can override.
After this pmdp_clear_flush is used only for THP case
to invalidate a huge page pte.

Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/pgtable-ppc64.h |  4 ++
 arch/powerpc/mm/pgtable_64.c             | 77 ++++++++++++++++----------------
 include/asm-generic/pgtable.h            |  9 ++++
 mm/huge_memory.c                         |  2 +-
 4 files changed, 53 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h 
b/arch/powerpc/include/asm/pgtable-ppc64.h
index ff275443a040..655dde8e9683 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -562,6 +562,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct 
*mm, unsigned long addr,
 extern void pmdp_splitting_flush_notify(struct vm_area_struct *vma,
                                        unsigned long address, pmd_t *pmdp);
 
+#define __HAVE_ARCH_PMDP_COLLAPSE_FLUSH
+extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+                                unsigned long address, pmd_t *pmdp);
+
 #define __HAVE_ARCH_PGTABLE_DEPOSIT
 extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
                                       pgtable_t pgtable);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 89b356250be3..fa49e2ff042b 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -558,45 +558,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, 
unsigned long addr,
 pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
                       pmd_t *pmdp)
 {
-       pmd_t pmd;
-
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-       if (pmd_trans_huge(*pmdp)) {
-               pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
-       } else {
-               /*
-                * khugepaged calls this for normal pmd
-                */
-               pmd = *pmdp;
-               pmd_clear(pmdp);
-               /*
-                * Wait for all pending hash_page to finish. This is needed
-                * in case of subpage collapse. When we collapse normal pages
-                * to hugepage, we first clear the pmd, then invalidate all
-                * the PTE entries. The assumption here is that any low level
-                * page fault will see a none pmd and take the slow path that
-                * will wait on mmap_sem. But we could very well be in a
-                * hash_page with local ptep pointer value. Such a hash page
-                * can result in adding new HPTE entries for normal subpages.
-                * That means we could be modifying the page content as we
-                * copy them to a huge page. So wait for parallel hash_page
-                * to finish before invalidating HPTE entries. We can do this
-                * by sending an IPI to all the cpus and executing a dummy
-                * function there.
-                */
-               kick_all_cpus_sync();
-               /*
-                * Now invalidate the hpte entries in the range
-                * covered by pmd. This make sure we take a
-                * fault and will find the pmd as none, which will
-                * result in a major fault which takes mmap_sem and
-                * hence wait for collapse to complete. Without this
-                * the __collapse_huge_page_copy can result in copying
-                * the old content.
-                */
-               flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
-       }
-       return pmd;
+       VM_BUG_ON(!pmd_trans_huge(*pmdp));
+       return pmdp_get_and_clear(vma->vm_mm, address, pmdp);
 }
 
 int pmdp_test_and_clear_young(struct vm_area_struct *vma,
@@ -641,6 +605,43 @@ void pmdp_splitting_flush_notify(struct vm_area_struct 
*vma,
        kick_all_cpus_sync();
 }
 
+pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+                         pmd_t *pmdp)
+{
+       pmd_t pmd;
+
+       VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+       pmd = *pmdp;
+       pmd_clear(pmdp);
+       /*
+        * Wait for all pending hash_page to finish. This is needed
+        * in case of subpage collapse. When we collapse normal pages
+        * to hugepage, we first clear the pmd, then invalidate all
+        * the PTE entries. The assumption here is that any low level
+        * page fault will see a none pmd and take the slow path that
+        * will wait on mmap_sem. But we could very well be in a
+        * hash_page with local ptep pointer value. Such a hash page
+        * can result in adding new HPTE entries for normal subpages.
+        * That means we could be modifying the page content as we
+        * copy them to a huge page. So wait for parallel hash_page
+        * to finish before invalidating HPTE entries. We can do this
+        * by sending an IPI to all the cpus and executing a dummy
+        * function there.
+        */
+       kick_all_cpus_sync();
+       /*
+        * Now invalidate the hpte entries in the range
+        * covered by pmd. This make sure we take a
+        * fault and will find the pmd as none, which will
+        * result in a major fault which takes mmap_sem and
+        * hence wait for collapse to complete. Without this
+        * the __collapse_huge_page_copy can result in copying
+        * the old content.
+        */
+       flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
+       return pmd;
+}
+
 /*
  * We want to put the pgtable in pmd and use pgtable for tracking
  * the base page size hptes
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index d091a666f5b1..2e1e4653ae7c 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -189,6 +189,15 @@ extern void pmdp_splitting_flush_notify(struct 
vm_area_struct *vma,
                                        unsigned long address, pmd_t *pmdp);
 #endif
 
+#ifndef __HAVE_ARCH_PMDP_COLLAPSE_FLUSH
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+                                      unsigned long address,
+                                      pmd_t *pmdp)
+{
+       return pmdp_clear_flush(vma, address, pmdp);
+}
+#endif
+
 #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT
 extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
                                       pgtable_t pgtable);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 81e9578bf43a..30c1b46fcf6d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2187,7 +2187,7 @@ static void collapse_huge_page(struct mm_struct *mm,
         * huge and small TLB entries for the same virtual address
         * to avoid the risk of CPU bugs in that area.
         */
-       _pmd = pmdp_clear_flush(vma, address, pmd);
+       _pmd = pmdp_collapse_flush(vma, address, pmd);
        spin_unlock(pmd_ptl);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to