From: Andrea Arcangeli <aarca...@redhat.com>

This allows UFFDIO_COPY to map pages write-protected.

Signed-off-by: Andrea Arcangeli <aarca...@redhat.com>
[peterx: switch to VM_WARN_ON_ONCE in mfill_atomic_pte; add brackets
 around "dst_vma->vm_flags & VM_WRITE"; fix wordings in comments and
 commit messages]
Reviewed-by: Jerome Glisse <jgli...@redhat.com>
Reviewed-by: Mike Rapoport <r...@linux.vnet.ibm.com>
Signed-off-by: Peter Xu <pet...@redhat.com>
---
 fs/userfaultfd.c                 |  5 +++--
 include/linux/userfaultfd_k.h    |  2 +-
 include/uapi/linux/userfaultfd.h | 11 +++++-----
 mm/userfaultfd.c                 | 36 ++++++++++++++++++++++----------
 4 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 5dbef45ecbf5..c594945ad5bf 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1694,11 +1694,12 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
        ret = -EINVAL;
        if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
                goto out;
-       if (uffdio_copy.mode & ~UFFDIO_COPY_MODE_DONTWAKE)
+       if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
                goto out;
        if (mmget_not_zero(ctx->mm)) {
                ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
-                                  uffdio_copy.len, &ctx->mmap_changing);
+                                  uffdio_copy.len, &ctx->mmap_changing,
+                                  uffdio_copy.mode);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 7b91b76aac58..dcd33172b728 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -36,7 +36,7 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, 
unsigned long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
                            unsigned long src_start, unsigned long len,
-                           bool *mmap_changing);
+                           bool *mmap_changing, __u64 mode);
 extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
                              unsigned long dst_start,
                              unsigned long len,
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 48f1a7c2f1f0..340f23bc251d 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -203,13 +203,14 @@ struct uffdio_copy {
        __u64 dst;
        __u64 src;
        __u64 len;
+#define UFFDIO_COPY_MODE_DONTWAKE              ((__u64)1<<0)
        /*
-        * There will be a wrprotection flag later that allows to map
-        * pages wrprotected on the fly. And such a flag will be
-        * available if the wrprotection ioctl are implemented for the
-        * range according to the uffdio_register.ioctls.
+        * UFFDIO_COPY_MODE_WP will map the page write protected on
+        * the fly.  UFFDIO_COPY_MODE_WP is available only if the
+        * write protected ioctl is implemented for the range
+        * according to the uffdio_register.ioctls.
         */
-#define UFFDIO_COPY_MODE_DONTWAKE              ((__u64)1<<0)
+#define UFFDIO_COPY_MODE_WP                    ((__u64)1<<1)
        __u64 mode;
 
        /*
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 9932d5755e4c..c8e7846e9b7e 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -25,7 +25,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
                            struct vm_area_struct *dst_vma,
                            unsigned long dst_addr,
                            unsigned long src_addr,
-                           struct page **pagep)
+                           struct page **pagep,
+                           bool wp_copy)
 {
        struct mem_cgroup *memcg;
        pte_t _dst_pte, *dst_pte;
@@ -71,9 +72,9 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
        if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
                goto out_release;
 
-       _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
-       if (dst_vma->vm_flags & VM_WRITE)
-               _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+       _dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
+       if ((dst_vma->vm_flags & VM_WRITE) && !wp_copy)
+               _dst_pte = pte_mkwrite(_dst_pte);
 
        dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
        if (dst_vma->vm_file) {
@@ -398,7 +399,8 @@ static __always_inline ssize_t mfill_atomic_pte(struct 
mm_struct *dst_mm,
                                                unsigned long dst_addr,
                                                unsigned long src_addr,
                                                struct page **page,
-                                               bool zeropage)
+                                               bool zeropage,
+                                               bool wp_copy)
 {
        ssize_t err;
 
@@ -415,11 +417,13 @@ static __always_inline ssize_t mfill_atomic_pte(struct 
mm_struct *dst_mm,
        if (!(dst_vma->vm_flags & VM_SHARED)) {
                if (!zeropage)
                        err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
-                                              dst_addr, src_addr, page);
+                                              dst_addr, src_addr, page,
+                                              wp_copy);
                else
                        err = mfill_zeropage_pte(dst_mm, dst_pmd,
                                                 dst_vma, dst_addr);
        } else {
+               VM_WARN_ON_ONCE(wp_copy);
                if (!zeropage)
                        err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
                                                     dst_vma, dst_addr,
@@ -437,7 +441,8 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
                                              unsigned long src_start,
                                              unsigned long len,
                                              bool zeropage,
-                                             bool *mmap_changing)
+                                             bool *mmap_changing,
+                                             __u64 mode)
 {
        struct vm_area_struct *dst_vma;
        ssize_t err;
@@ -445,6 +450,7 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
        unsigned long src_addr, dst_addr;
        long copied;
        struct page *page;
+       bool wp_copy;
 
        /*
         * Sanitize the command parameters:
@@ -501,6 +507,14 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
            dst_vma->vm_flags & VM_SHARED))
                goto out_unlock;
 
+       /*
+        * validate 'mode' now that we know the dst_vma: don't allow
+        * a wrprotect copy if the userfaultfd didn't register as WP.
+        */
+       wp_copy = mode & UFFDIO_COPY_MODE_WP;
+       if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
+               goto out_unlock;
+
        /*
         * If this is a HUGETLB vma, pass off to appropriate routine
         */
@@ -556,7 +570,7 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
                BUG_ON(pmd_trans_huge(*dst_pmd));
 
                err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
-                                      src_addr, &page, zeropage);
+                                      src_addr, &page, zeropage, wp_copy);
                cond_resched();
 
                if (unlikely(err == -ENOENT)) {
@@ -603,14 +617,14 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
 
 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
                     unsigned long src_start, unsigned long len,
-                    bool *mmap_changing)
+                    bool *mmap_changing, __u64 mode)
 {
        return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
-                             mmap_changing);
+                             mmap_changing, mode);
 }
 
 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
                       unsigned long len, bool *mmap_changing)
 {
-       return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
+       return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
 }
-- 
2.21.0

Reply via email to