From: Peter Xu <pet...@redhat.com>

For either swap and page migration, we all use the bit 2 of the entry to
identify whether this entry is uffd write-protected.  It plays a similar
role as the existing soft dirty bit in swap entries but only for keeping
the uffd-wp tracking for a specific PTE/PMD.

Something special here is that when we want to recover the uffd-wp bit
from a swap/migration entry to the PTE bit we'll also need to take care of
the _PAGE_RW bit and make sure it's cleared, otherwise even with the
_PAGE_UFFD_WP bit we can't trap it at all.

In change_pte_range() we do nothing for uffd if the PTE is a swap entry.
That can lead to data mismatch if the page that we are going to write
protect is swapped out when sending the UFFDIO_WRITEPROTECT.  This patch
also applies/removes the uffd-wp bit even for the swap entries.

Signed-off-by: Peter Xu <pet...@redhat.com>
Signed-off-by: Andrew Morton <a...@linux-foundation.org>
Cc: Andrea Arcangeli <aarca...@redhat.com>
Cc: Bobby Powers <bobbypow...@gmail.com>
Cc: Brian Geffon <bgef...@google.com>
Cc: David Hildenbrand <da...@redhat.com>
Cc: Denis Plotnikov <dplotni...@virtuozzo.com>
Cc: "Dr . David Alan Gilbert" <dgilb...@redhat.com>
Cc: Hugh Dickins <hu...@google.com>
Cc: Jerome Glisse <jgli...@redhat.com>
Cc: Johannes Weiner <han...@cmpxchg.org>
Cc: "Kirill A . Shutemov" <kir...@shutemov.name>
Cc: Martin Cracauer <craca...@cons.org>
Cc: Marty McFadden <mcfadd...@llnl.gov>
Cc: Maya Gokhale <gokha...@llnl.gov>
Cc: Mel Gorman <mgor...@suse.de>
Cc: Mike Kravetz <mike.krav...@oracle.com>
Cc: Mike Rapoport <r...@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xe...@openvz.org>
Cc: Rik van Riel <r...@redhat.com>
Cc: Shaohua Li <s...@fb.com>
Link: http://lkml.kernel.org/r/20200220163112.11409-11-pet...@redhat.com
Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>

https://jira.sw.ru/browse/PSBM-102938
(cherry picked from commit f45ec5ff16a75f96dac8c89862d75f1d8739efd4)
Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 include/linux/swapops.h |  2 ++
 mm/huge_memory.c        |  3 +++
 mm/memory.c             |  8 ++++++++
 mm/migrate.c            |  6 ++++++
 mm/mprotect.c           | 28 +++++++++++++++++-----------
 mm/rmap.c               |  6 ++++++
 6 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 1d3877c39a00..affbbbe7abcb 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -69,6 +69,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
 
        if (pte_swp_soft_dirty(pte))
                pte = pte_swp_clear_soft_dirty(pte);
+       if (pte_swp_uffd_wp(pte))
+               pte = pte_swp_clear_uffd_wp(pte);
        arch_entry = __pte_to_swp_entry(pte);
        return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b26c2daf3547..78c5eef073dd 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2154,6 +2154,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
                write = is_write_migration_entry(entry);
                young = false;
                soft_dirty = pmd_swp_soft_dirty(old_pmd);
+               uffd_wp = pmd_swp_uffd_wp(old_pmd);
        } else {
                page = pmd_page(old_pmd);
                if (pmd_dirty(old_pmd))
@@ -2186,6 +2187,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct 
*vma, pmd_t *pmd,
                        entry = swp_entry_to_pte(swp_entry);
                        if (soft_dirty)
                                entry = pte_swp_mksoft_dirty(entry);
+                       if (uffd_wp)
+                               entry = pte_swp_mkuffd_wp(entry);
                } else {
                        entry = mk_pte(page + i, READ_ONCE(vma->vm_page_prot));
                        entry = maybe_mkwrite(entry, vma);
diff --git a/mm/memory.c b/mm/memory.c
index 7e0189d729e3..c256bf4d297c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -985,6 +985,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct 
*src_mm,
                                pte = swp_entry_to_pte(entry);
                                if (pte_swp_soft_dirty(*src_pte))
                                        pte = pte_swp_mksoft_dirty(pte);
+                               if (pte_swp_uffd_wp(*src_pte))
+                                       pte = pte_swp_mkuffd_wp(pte);
                                set_pte_at(src_mm, addr, src_pte, pte);
                        }
                } else if (is_device_private_entry(entry)) {
@@ -1014,6 +1016,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct 
*src_mm,
                            is_cow_mapping(vm_flags)) {
                                make_device_private_entry_read(&entry);
                                pte = swp_entry_to_pte(entry);
+                               if (pte_swp_uffd_wp(*src_pte))
+                                       pte = pte_swp_mkuffd_wp(pte);
                                set_pte_at(src_mm, addr, src_pte, pte);
                        }
                }
@@ -3079,6 +3083,10 @@ int do_swap_page(struct vm_fault *vmf)
        flush_icache_page(vma, page);
        if (pte_swp_soft_dirty(vmf->orig_pte))
                pte = pte_mksoft_dirty(pte);
+       if (pte_swp_uffd_wp(vmf->orig_pte)) {
+               pte = pte_mkuffd_wp(pte);
+               pte = pte_wrprotect(pte);
+       }
        set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte);
        arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte);
        vmf->orig_pte = pte;
diff --git a/mm/migrate.c b/mm/migrate.c
index cc06cfef2fa1..1814dea49329 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -241,11 +241,15 @@ static bool remove_migration_pte(struct page *page, 
struct vm_area_struct *vma,
                entry = pte_to_swp_entry(*pvmw.pte);
                if (is_write_migration_entry(entry))
                        pte = maybe_mkwrite(pte, vma);
+               else if (pte_swp_uffd_wp(*pvmw.pte))
+                       pte = pte_mkuffd_wp(pte);
 
                if (unlikely(is_zone_device_page(new))) {
                        if (is_device_private_page(new)) {
                                entry = make_device_private_entry(new, 
pte_write(pte));
                                pte = swp_entry_to_pte(entry);
+                               if (pte_swp_uffd_wp(*pvmw.pte))
+                                       pte = pte_mkuffd_wp(pte);
                        } else if (is_device_public_page(new)) {
                                pte = pte_mkdevmap(pte);
                                flush_dcache_page(new);
@@ -2324,6 +2328,8 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pte))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pte))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, addr, ptep, swp_pte);
 
                        /*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 2df60e64f139..475e18ba7131 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -140,11 +140,11 @@ static unsigned long change_pte_range(struct 
vm_area_struct *vma, pmd_t *pmd,
                        }
                        ptep_modify_prot_commit(mm, addr, pte, ptent);
                        pages++;
-               } else if (IS_ENABLED(CONFIG_MIGRATION)) {
+               } else if (is_swap_pte(oldpte)) {
                        swp_entry_t entry = pte_to_swp_entry(oldpte);
+                       pte_t newpte;
 
                        if (is_write_migration_entry(entry)) {
-                               pte_t newpte;
                                /*
                                 * A protection check is difficult so
                                 * just be safe and disable write
@@ -153,22 +153,28 @@ static unsigned long change_pte_range(struct 
vm_area_struct *vma, pmd_t *pmd,
                                newpte = swp_entry_to_pte(entry);
                                if (pte_swp_soft_dirty(oldpte))
                                        newpte = pte_swp_mksoft_dirty(newpte);
-                               set_pte_at(mm, addr, pte, newpte);
-
-                               pages++;
-                       }
-
-                       if (is_write_device_private_entry(entry)) {
-                               pte_t newpte;
-
+                               if (pte_swp_uffd_wp(oldpte))
+                                       newpte = pte_swp_mkuffd_wp(newpte);
+                       } else if (is_write_device_private_entry(entry)) {
                                /*
                                 * We do not preserve soft-dirtiness. See
                                 * copy_one_pte() for explanation.
                                 */
                                make_device_private_entry_read(&entry);
                                newpte = swp_entry_to_pte(entry);
-                               set_pte_at(mm, addr, pte, newpte);
+                               if (pte_swp_uffd_wp(oldpte))
+                                       newpte = pte_swp_mkuffd_wp(newpte);
+                       } else {
+                               newpte = oldpte;
+                       }
 
+                       if (uffd_wp)
+                               newpte = pte_swp_mkuffd_wp(newpte);
+                       else if (uffd_wp_resolve)
+                               newpte = pte_swp_clear_uffd_wp(newpte);
+
+                       if (!pte_same(oldpte, newpte)) {
+                               set_pte_at(mm, addr, pte, newpte);
                                pages++;
                        }
                }
diff --git a/mm/rmap.c b/mm/rmap.c
index eb477809a5c0..d9f37eccbc02 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1427,6 +1427,8 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
                        /*
                         * No need to invalidate here it will synchronize on
@@ -1519,6 +1521,8 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, address, pvmw.pte, swp_pte);
                        /*
                         * No need to invalidate here it will synchronize on
@@ -1585,6 +1589,8 @@ static bool try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
                        swp_pte = swp_entry_to_pte(entry);
                        if (pte_soft_dirty(pteval))
                                swp_pte = pte_swp_mksoft_dirty(swp_pte);
+                       if (pte_uffd_wp(pteval))
+                               swp_pte = pte_swp_mkuffd_wp(swp_pte);
                        set_pte_at(mm, address, pvmw.pte, swp_pte);
                        /* Invalidate as we cleared the pte */
                        mmu_notifier_invalidate_range(mm, address,
-- 
2.25.3

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to