From: Zi Yan <z...@nvidia.com>

Unmap different subpages in different sized THPs properly in the
try_to_unmap() function.

Signed-off-by: Zi Yan <z...@nvidia.com>
---
 mm/migrate.c |   2 +-
 mm/rmap.c    | 140 +++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 103 insertions(+), 39 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index f7e5d88210ee..7deb64d75adb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -223,7 +223,7 @@ static bool remove_migration_pte(struct page *page, struct 
vm_area_struct *vma,
 
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
                /* PMD-mapped THP migration entry */
-               if (!pvmw.pte) {
+               if (!pvmw.pte && pvmw.pmd) {
                        VM_BUG_ON_PAGE(PageHuge(page) || 
!PageTransCompound(page), page);
                        remove_migration_pmd(&pvmw, new);
                        continue;
diff --git a/mm/rmap.c b/mm/rmap.c
index 79908cfc518a..39f446a6775d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1031,7 +1031,7 @@ void page_move_anon_rmap(struct page *page, struct 
vm_area_struct *vma)
  * __page_set_anon_rmap - set up new anonymous rmap
  * @page:      Page or Hugepage to add to rmap
  * @vma:       VM area to add page to.
- * @address:   User virtual address of the mapping     
+ * @address:   User virtual address of the mapping
  * @exclusive: the page is exclusively owned by the current process
  */
 static void __page_set_anon_rmap(struct page *page,
@@ -1423,7 +1423,9 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                .address = address,
        };
        pte_t pteval;
-       struct page *subpage;
+       pmd_t pmdval;
+       pud_t pudval;
+       struct page *subpage = NULL;
        bool ret = true;
        struct mmu_notifier_range range;
        enum ttu_flags flags = (enum ttu_flags)arg;
@@ -1436,6 +1438,11 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
            is_zone_device_page(page) && !is_device_private_page(page))
                return true;
 
+       if (flags & TTU_SPLIT_HUGE_PUD) {
+               split_huge_pud_address(vma, address,
+                               flags & TTU_SPLIT_FREEZE, page);
+       }
+
        if (flags & TTU_SPLIT_HUGE_PMD) {
                split_huge_pmd_address(vma, address,
                                flags & TTU_SPLIT_FREEZE, page);
@@ -1465,7 +1472,7 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
        while (page_vma_mapped_walk(&pvmw)) {
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
                /* PMD-mapped THP migration entry */
-               if (!pvmw.pte && (flags & TTU_MIGRATION)) {
+               if (!pvmw.pte && pvmw.pmd && (flags & TTU_MIGRATION)) {
                        VM_BUG_ON_PAGE(PageHuge(page) || 
!PageTransCompound(page), page);
 
                        set_pmd_migration_entry(&pvmw, page);
@@ -1497,9 +1504,14 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                }
 
                /* Unexpected PMD-mapped THP? */
-               VM_BUG_ON_PAGE(!pvmw.pte, page);
 
-               subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               if (pvmw.pte)
+                       subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
+               else if (!pvmw.pte && pvmw.pmd)
+                       subpage = page - page_to_pfn(page) + pmd_pfn(*pvmw.pmd);
+               else if (!pvmw.pte && !pvmw.pmd && pvmw.pud)
+                       subpage = page - page_to_pfn(page) + pud_pfn(*pvmw.pud);
+               VM_BUG_ON(!subpage);
                address = pvmw.address;
 
                if (PageHuge(page)) {
@@ -1556,16 +1568,26 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                }
 
                if (!(flags & TTU_IGNORE_ACCESS)) {
-                       if (ptep_clear_flush_young_notify(vma, address,
-                                               pvmw.pte)) {
-                               ret = false;
-                               page_vma_mapped_walk_done(&pvmw);
-                               break;
+                       if ((pvmw.pte &&
+                                ptep_clear_flush_young_notify(vma, address, 
pvmw.pte)) ||
+                               ((!pvmw.pte && pvmw.pmd) &&
+                                pmdp_clear_flush_young_notify(vma, address, 
pvmw.pmd)) ||
+                               ((!pvmw.pte && !pvmw.pmd && pvmw.pud) &&
+                                pudp_clear_flush_young_notify(vma, address, 
pvmw.pud))
+                               ) {
+                                       ret = false;
+                                       page_vma_mapped_walk_done(&pvmw);
+                                       break;
                        }
                }
 
                /* Nuke the page table entry. */
-               flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+               if (pvmw.pte)
+                       flush_cache_page(vma, address, pte_pfn(*pvmw.pte));
+               else if (!pvmw.pte && pvmw.pmd)
+                       flush_cache_page(vma, address, pmd_pfn(*pvmw.pmd));
+               else if (!pvmw.pte && !pvmw.pmd && pvmw.pud)
+                       flush_cache_page(vma, address, pud_pfn(*pvmw.pud));
                if (should_defer_flush(mm, flags)) {
                        /*
                         * We clear the PTE but do not flush so potentially
@@ -1575,16 +1597,34 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                         * transition on a cached TLB entry is written through
                         * and traps if the PTE is unmapped.
                         */
-                       pteval = ptep_get_and_clear(mm, address, pvmw.pte);
+                       if (pvmw.pte) {
+                               pteval = ptep_get_and_clear(mm, address, 
pvmw.pte);
+
+                               set_tlb_ubc_flush_pending(mm, 
pte_dirty(pteval));
+                       } else if (!pvmw.pte && pvmw.pmd) {
+                               pmdval = pmdp_huge_get_and_clear(mm, address, 
pvmw.pmd);
 
-                       set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
+                               set_tlb_ubc_flush_pending(mm, 
pmd_dirty(pmdval));
+                       } else if (!pvmw.pte && !pvmw.pmd && pvmw.pud) {
+                               pudval = pudp_huge_get_and_clear(mm, address, 
pvmw.pud);
+
+                               set_tlb_ubc_flush_pending(mm, 
pud_dirty(pudval));
+                       }
                } else {
-                       pteval = ptep_clear_flush(vma, address, pvmw.pte);
+                       if (pvmw.pte)
+                               pteval = ptep_clear_flush(vma, address, 
pvmw.pte);
+                       else if (!pvmw.pte && pvmw.pmd)
+                               pmdval = pmdp_huge_clear_flush(vma, address, 
pvmw.pmd);
+                       else if (!pvmw.pte && !pvmw.pmd && pvmw.pud)
+                               pudval = pudp_huge_clear_flush(vma, address, 
pvmw.pud);
                }
 
                /* Move the dirty bit to the page. Now the pte is gone. */
-               if (pte_dirty(pteval))
-                       set_page_dirty(page);
+                       if ((pvmw.pte && pte_dirty(pteval)) ||
+                               ((!pvmw.pte && pvmw.pmd) && pmd_dirty(pmdval)) 
||
+                               ((!pvmw.pte && !pvmw.pmd && pvmw.pud) && 
pud_dirty(pudval))
+                               )
+                               set_page_dirty(page);
 
                /* Update high watermark before we lower rss */
                update_hiwater_rss(mm);
@@ -1620,33 +1660,57 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                } else if (IS_ENABLED(CONFIG_MIGRATION) &&
                                (flags & (TTU_MIGRATION|TTU_SPLIT_FREEZE))) {
                        swp_entry_t entry;
-                       pte_t swp_pte;
 
-                       if (arch_unmap_one(mm, vma, address, pteval) < 0) {
-                               set_pte_at(mm, address, pvmw.pte, pteval);
-                               ret = false;
-                               page_vma_mapped_walk_done(&pvmw);
-                               break;
-                       }
+                       if (pvmw.pte) {
+                               pte_t swp_pte;
 
-                       /*
-                        * Store the pfn of the page in a special migration
-                        * pte. do_swap_page() will wait until the migration
-                        * pte is removed and then restart fault handling.
-                        */
-                       entry = make_migration_entry(subpage,
-                                       pte_write(pteval));
-                       swp_pte = swp_entry_to_pte(entry);
-                       if (pte_soft_dirty(pteval))
-                               swp_pte = pte_swp_mksoft_dirty(swp_pte);
-                       set_pte_at(mm, address, pvmw.pte, swp_pte);
-                       /*
-                        * No need to invalidate here it will synchronize on
-                        * against the special swap migration pte.
-                        */
+                               if (arch_unmap_one(mm, vma, address, pteval) < 
0) {
+                                       set_pte_at(mm, address, pvmw.pte, 
pteval);
+                                       ret = false;
+                                       page_vma_mapped_walk_done(&pvmw);
+                                       break;
+                               }
+
+                               /*
+                                * Store the pfn of the page in a special 
migration
+                                * pte. do_swap_page() will wait until the 
migration
+                                * pte is removed and then restart fault 
handling.
+                                */
+                               entry = make_migration_entry(subpage,
+                                               pte_write(pteval));
+                               swp_pte = swp_entry_to_pte(entry);
+                               if (pte_soft_dirty(pteval))
+                                       swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                               set_pte_at(mm, address, pvmw.pte, swp_pte);
+                               /*
+                                * No need to invalidate here it will 
synchronize on
+                                * against the special swap migration pte.
+                                */
+                       } else if (!pvmw.pte && pvmw.pmd) {
+                               pmd_t swp_pmd;
+                               /*
+                                * Store the pfn of the page in a special 
migration
+                                * pte. do_swap_page() will wait until the 
migration
+                                * pte is removed and then restart fault 
handling.
+                                */
+                               entry = make_migration_entry(subpage,
+                                               pmd_write(pmdval));
+                               swp_pmd = swp_entry_to_pmd(entry);
+                               if (pmd_soft_dirty(pmdval))
+                                       swp_pmd = pmd_swp_mksoft_dirty(swp_pmd);
+                               set_pmd_at(mm, address, pvmw.pmd, swp_pmd);
+                               /*
+                                * No need to invalidate here it will 
synchronize on
+                                * against the special swap migration pte.
+                                */
+                       } else if (!pvmw.pte && !pvmw.pmd && pvmw.pud) {
+                               VM_BUG_ON(1);
+                       }
                } else if (PageAnon(page)) {
                        swp_entry_t entry = { .val = page_private(subpage) };
                        pte_t swp_pte;
+
+                       VM_BUG_ON(!pvmw.pte);
                        /*
                         * Store the swap location in the pte.
                         * See handle_pte_fault() ...
-- 
2.20.1

Reply via email to