Firstly, pass the wp_copy variable into hugetlb_mcopy_atomic_pte() thoughout
the stack.  Then, apply the UFFD_WP bit if UFFDIO_COPY_MODE_WP is with
UFFDIO_COPY.  Introduce huge_pte_mkuffd_wp() for it.

Note that similar to how we've handled shmem, we'd better keep setting the
dirty bit even if UFFDIO_COPY_MODE_WP is provided, so that the core mm will
know this page contains valid data and never drop it.

Signed-off-by: Peter Xu <pet...@redhat.com>
---
 include/asm-generic/hugetlb.h |  5 +++++
 include/linux/hugetlb.h       |  6 ++++--
 mm/hugetlb.c                  | 22 +++++++++++++++++-----
 mm/userfaultfd.c              | 12 ++++++++----
 4 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 8e1e6244a89d..548212eccbd6 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -27,6 +27,11 @@ static inline pte_t huge_pte_mkdirty(pte_t pte)
        return pte_mkdirty(pte);
 }
 
+static inline pte_t huge_pte_mkuffd_wp(pte_t pte)
+{
+       return pte_mkuffd_wp(pte);
+}
+
 static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
 {
        return pte_modify(pte, newprot);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index a7f7d5f328dc..ef8d2b8427b1 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -141,7 +141,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, 
pte_t *dst_pte,
                                unsigned long dst_addr,
                                unsigned long src_addr,
                                enum mcopy_atomic_mode mode,
-                               struct page **pagep);
+                               struct page **pagep,
+                               bool wp_copy);
 #endif /* CONFIG_USERFAULTFD */
 bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
                                                struct vm_area_struct *vma,
@@ -321,7 +322,8 @@ static inline int hugetlb_mcopy_atomic_pte(struct mm_struct 
*dst_mm,
                                                unsigned long dst_addr,
                                                unsigned long src_addr,
                                                enum mcopy_atomic_mode mode,
-                                               struct page **pagep)
+                                               struct page **pagep,
+                                               bool wp_copy)
 {
        BUG();
        return 0;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index def2c7ddf3ae..f0e55b341ebd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4725,7 +4725,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
                            unsigned long dst_addr,
                            unsigned long src_addr,
                            enum mcopy_atomic_mode mode,
-                           struct page **pagep)
+                           struct page **pagep,
+                           bool wp_copy)
 {
        bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
        struct address_space *mapping;
@@ -4822,17 +4823,28 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
                hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
        }
 
-       /* For CONTINUE on a non-shared VMA, don't set VM_WRITE for CoW. */
-       if (is_continue && !vm_shared)
+       /*
+        * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
+        * with wp flag set, don't set pte write bit.
+        */
+       if (wp_copy || (is_continue && !vm_shared))
                writable = 0;
        else
                writable = dst_vma->vm_flags & VM_WRITE;
 
        _dst_pte = make_huge_pte(dst_vma, page, writable);
-       if (writable)
-               _dst_pte = huge_pte_mkdirty(_dst_pte);
+       /*
+        * Always mark UFFDIO_COPY page dirty; note that this may not be
+        * extremely important for hugetlbfs for now since swapping is not
+        * supported, but we should still be clear in that this page cannot be
+        * thrown away at will, even if write bit not set.
+        */
+       _dst_pte = huge_pte_mkdirty(_dst_pte);
        _dst_pte = pte_mkyoung(_dst_pte);
 
+       if (wp_copy)
+               _dst_pte = huge_pte_mkuffd_wp(_dst_pte);
+
        set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 
        (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 0963e0d9ed20..78471ae3d25c 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -207,7 +207,8 @@ static __always_inline ssize_t 
__mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
                                              unsigned long dst_start,
                                              unsigned long src_start,
                                              unsigned long len,
-                                             enum mcopy_atomic_mode mode)
+                                             enum mcopy_atomic_mode mode,
+                                             bool wp_copy)
 {
        int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
        int vm_shared = dst_vma->vm_flags & VM_SHARED;
@@ -304,7 +305,8 @@ static __always_inline ssize_t 
__mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
                }
 
                err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
-                                              dst_addr, src_addr, mode, &page);
+                                              dst_addr, src_addr, mode, &page,
+                                              wp_copy);
 
                mutex_unlock(&hugetlb_fault_mutex_table[hash]);
                i_mmap_unlock_read(mapping);
@@ -406,7 +408,8 @@ extern ssize_t __mcopy_atomic_hugetlb(struct mm_struct 
*dst_mm,
                                      unsigned long dst_start,
                                      unsigned long src_start,
                                      unsigned long len,
-                                     enum mcopy_atomic_mode mode);
+                                     enum mcopy_atomic_mode mode,
+                                     bool wp_copy);
 #endif /* CONFIG_HUGETLB_PAGE */
 
 static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
@@ -527,7 +530,8 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
         */
        if (is_vm_hugetlb_page(dst_vma))
                return  __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
-                                               src_start, len, mcopy_mode);
+                                              src_start, len, mcopy_mode,
+                                              wp_copy);
 
        if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
                goto out_unlock;
-- 
2.26.2

Reply via email to