The commit is pushed to "branch-rh8-4.18.0-80.1.2.vz8.3.x-ovz" and will appear 
at https://src.openvz.org/scm/ovz/vzkernel.git
after rh8-4.18.0-80.1.2.vz8.3.6
------>
commit bd796b3cdc26ff1da8c5bfe8b462c720304e0c8f
Author: Andrea Arcangeli <aarca...@redhat.com>
Date:   Mon Apr 20 10:34:33 2020 +0300

    userfaultfd: wp: add UFFDIO_COPY_MODE_WP
    
    This allows UFFDIO_COPY to map pages write-protected.
    
    [pet...@redhat.com: switch to VM_WARN_ON_ONCE in mfill_atomic_pte; add 
brackets
     around "dst_vma->vm_flags & VM_WRITE"; fix wordings in comments and
     commit messages]
    Signed-off-by: Andrea Arcangeli <aarca...@redhat.com>
    Signed-off-by: Peter Xu <pet...@redhat.com>
    Signed-off-by: Andrew Morton <a...@linux-foundation.org>
    Reviewed-by: Jerome Glisse <jgli...@redhat.com>
    Reviewed-by: Mike Rapoport <r...@linux.vnet.ibm.com>
    Cc: Bobby Powers <bobbypow...@gmail.com>
    Cc: Brian Geffon <bgef...@google.com>
    Cc: David Hildenbrand <da...@redhat.com>
    Cc: Denis Plotnikov <dplotni...@virtuozzo.com>
    Cc: "Dr . David Alan Gilbert" <dgilb...@redhat.com>
    Cc: Hugh Dickins <hu...@google.com>
    Cc: Johannes Weiner <han...@cmpxchg.org>
    Cc: "Kirill A . Shutemov" <kir...@shutemov.name>
    Cc: Martin Cracauer <craca...@cons.org>
    Cc: Marty McFadden <mcfadd...@llnl.gov>
    Cc: Maya Gokhale <gokha...@llnl.gov>
    Cc: Mel Gorman <mgor...@suse.de>
    Cc: Mike Kravetz <mike.krav...@oracle.com>
    Cc: Pavel Emelyanov <xe...@openvz.org>
    Cc: Rik van Riel <r...@redhat.com>
    Cc: Shaohua Li <s...@fb.com>
    Link: http://lkml.kernel.org/r/20200220163112.11409-6-pet...@redhat.com
    Signed-off-by: Linus Torvalds <torva...@linux-foundation.org>
    
    https://jira.sw.ru/browse/PSBM-102938
    (cherry picked from commit 72981e0e7b609c741d7764cc920c8fec00920bd5)
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 fs/userfaultfd.c                 |  5 +++--
 include/linux/userfaultfd_k.h    |  2 +-
 include/uapi/linux/userfaultfd.h | 11 ++++++-----
 mm/userfaultfd.c                 | 36 +++++++++++++++++++++++++-----------
 4 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0bfbe5182773..5dde50cd02f8 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1715,11 +1715,12 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
        ret = -EINVAL;
        if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
                goto out;
-       if (uffdio_copy.mode & ~UFFDIO_COPY_MODE_DONTWAKE)
+       if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
                goto out;
        if (mmget_not_zero(ctx->mm)) {
                ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
-                                  uffdio_copy.len, &ctx->mmap_changing);
+                                  uffdio_copy.len, &ctx->mmap_changing,
+                                  uffdio_copy.mode);
                mmput(ctx->mm);
        } else {
                return -ESRCH;
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 8fc07c5af632..8776b913d2e9 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -34,7 +34,7 @@ extern int handle_userfault(struct vm_fault *vmf, unsigned 
long reason);
 
 extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
                            unsigned long src_start, unsigned long len,
-                           bool *mmap_changing);
+                           bool *mmap_changing, __u64 mode);
 extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
                              unsigned long dst_start,
                              unsigned long len,
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 48f1a7c2f1f0..340f23bc251d 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -203,13 +203,14 @@ struct uffdio_copy {
        __u64 dst;
        __u64 src;
        __u64 len;
+#define UFFDIO_COPY_MODE_DONTWAKE              ((__u64)1<<0)
        /*
-        * There will be a wrprotection flag later that allows to map
-        * pages wrprotected on the fly. And such a flag will be
-        * available if the wrprotection ioctl are implemented for the
-        * range according to the uffdio_register.ioctls.
+        * UFFDIO_COPY_MODE_WP will map the page write protected on
+        * the fly.  UFFDIO_COPY_MODE_WP is available only if the
+        * write protected ioctl is implemented for the range
+        * according to the uffdio_register.ioctls.
         */
-#define UFFDIO_COPY_MODE_DONTWAKE              ((__u64)1<<0)
+#define UFFDIO_COPY_MODE_WP                    ((__u64)1<<1)
        __u64 mode;
 
        /*
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 8d26d35f1203..be9fc77575de 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -55,7 +55,8 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
                            struct vm_area_struct *dst_vma,
                            unsigned long dst_addr,
                            unsigned long src_addr,
-                           struct page **pagep)
+                           struct page **pagep,
+                           bool wp_copy)
 {
        struct mem_cgroup *memcg;
        pte_t _dst_pte, *dst_pte;
@@ -101,9 +102,9 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
        if (mem_cgroup_try_charge(page, dst_mm, GFP_KERNEL, &memcg, false))
                goto out_release;
 
-       _dst_pte = mk_pte(page, dst_vma->vm_page_prot);
-       if (dst_vma->vm_flags & VM_WRITE)
-               _dst_pte = pte_mkwrite(pte_mkdirty(_dst_pte));
+       _dst_pte = pte_mkdirty(mk_pte(page, dst_vma->vm_page_prot));
+       if ((dst_vma->vm_flags & VM_WRITE) && !wp_copy)
+               _dst_pte = pte_mkwrite(_dst_pte);
 
        dst_pte = pte_offset_map_lock(dst_mm, dst_pmd, dst_addr, &ptl);
        if (dst_vma->vm_file) {
@@ -418,7 +419,8 @@ static __always_inline ssize_t mfill_atomic_pte(struct 
mm_struct *dst_mm,
                                                unsigned long dst_addr,
                                                unsigned long src_addr,
                                                struct page **page,
-                                               bool zeropage)
+                                               bool zeropage,
+                                               bool wp_copy)
 {
        ssize_t err;
 
@@ -435,11 +437,13 @@ static __always_inline ssize_t mfill_atomic_pte(struct 
mm_struct *dst_mm,
        if (!(dst_vma->vm_flags & VM_SHARED)) {
                if (!zeropage)
                        err = mcopy_atomic_pte(dst_mm, dst_pmd, dst_vma,
-                                              dst_addr, src_addr, page);
+                                              dst_addr, src_addr, page,
+                                              wp_copy);
                else
                        err = mfill_zeropage_pte(dst_mm, dst_pmd,
                                                 dst_vma, dst_addr);
        } else {
+               VM_WARN_ON_ONCE(wp_copy);
                if (!zeropage)
                        err = shmem_mcopy_atomic_pte(dst_mm, dst_pmd,
                                                     dst_vma, dst_addr,
@@ -457,7 +461,8 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
                                              unsigned long src_start,
                                              unsigned long len,
                                              bool zeropage,
-                                             bool *mmap_changing)
+                                             bool *mmap_changing,
+                                             __u64 mode)
 {
        struct vm_area_struct *dst_vma;
        ssize_t err;
@@ -465,6 +470,7 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
        unsigned long src_addr, dst_addr;
        long copied;
        struct page *page;
+       bool wp_copy;
 
        /*
         * Sanitize the command parameters:
@@ -510,6 +516,14 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
            dst_vma->vm_flags & VM_SHARED))
                goto out_unlock;
 
+       /*
+        * validate 'mode' now that we know the dst_vma: don't allow
+        * a wrprotect copy if the userfaultfd didn't register as WP.
+        */
+       wp_copy = mode & UFFDIO_COPY_MODE_WP;
+       if (wp_copy && !(dst_vma->vm_flags & VM_UFFD_WP))
+               goto out_unlock;
+
        /*
         * If this is a HUGETLB vma, pass off to appropriate routine
         */
@@ -565,7 +579,7 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
                BUG_ON(pmd_trans_huge(*dst_pmd));
 
                err = mfill_atomic_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
-                                      src_addr, &page, zeropage);
+                                      src_addr, &page, zeropage, wp_copy);
                cond_resched();
 
                if (unlikely(err == -ENOENT)) {
@@ -612,14 +626,14 @@ static __always_inline ssize_t __mcopy_atomic(struct 
mm_struct *dst_mm,
 
 ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
                     unsigned long src_start, unsigned long len,
-                    bool *mmap_changing)
+                    bool *mmap_changing, __u64 mode)
 {
        return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
-                             mmap_changing);
+                             mmap_changing, mode);
 }
 
 ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
                       unsigned long len, bool *mmap_changing)
 {
-       return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing);
+       return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
 }
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to