Expand the userfaultfd_register/unregister routines to allow VM_HUGETLB
vmas.  huge page alignment checking is performed after a VM_HUGETLB
vma is encountered.

Also, since there is no UFFDIO_ZEROPAGE support for huge pages do not
return that as a valid ioctl method for huge page ranges.

Signed-off-by: Mike Kravetz <mike.krav...@oracle.com>
---
 fs/userfaultfd.c                 | 69 +++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/userfaultfd.h |  3 ++
 2 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 2d97952..7a1a345 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -26,6 +26,7 @@
 #include <linux/mempolicy.h>
 #include <linux/ioctl.h>
 #include <linux/security.h>
+#include <linux/hugetlb.h>
 
 static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
 
@@ -728,6 +729,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
        struct uffdio_register __user *user_uffdio_register;
        unsigned long vm_flags, new_flags;
        bool found;
+       bool huge_pages;
        unsigned long start, end, vma_end;
 
        user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -779,6 +781,17 @@ static int userfaultfd_register(struct userfaultfd_ctx 
*ctx,
                goto out_unlock;
 
        /*
+        * If the first vma contains huge pages, make sure start address
+        * is aligned to huge page size.
+        */
+       if (vma->vm_flags & VM_HUGETLB) {
+               unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
+
+               if (start & (vma_hpagesize - 1))
+                       goto out_unlock;
+       }
+
+       /*
         * Search for not compatible vmas.
         *
         * FIXME: this shall be relaxed later so that it doesn't fail
@@ -786,6 +799,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
         * on anonymous vmas).
         */
        found = false;
+       huge_pages = false;
        for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
                cond_resched();
 
@@ -794,7 +808,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 
                /* check not compatible vmas */
                ret = -EINVAL;
-               if (cur->vm_ops)
+               if (cur->vm_ops && !(cur->vm_flags & VM_HUGETLB))
                        goto out_unlock;
 
                /*
@@ -808,6 +822,25 @@ static int userfaultfd_register(struct userfaultfd_ctx 
*ctx,
                    cur->vm_userfaultfd_ctx.ctx != ctx)
                        goto out_unlock;
 
+               /*
+                * Note vmas containing huge pages
+                */
+               if (cur->vm_flags & VM_HUGETLB) {
+                       huge_pages = true;
+
+                       /*
+                        * If vma contains end address, check alignment
+                        */
+                       ret = -EINVAL;
+                       if (end <= cur->vm_end && end > cur->vm_start) {
+                               unsigned long vma_hpagesize =
+                                       vma_kernel_pagesize(cur);
+
+                               if (end & (vma_hpagesize - 1))
+                                       goto out_unlock;
+                       }
+               }
+
                found = true;
        }
        BUG_ON(!found);
@@ -819,7 +852,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
        do {
                cond_resched();
 
-               BUG_ON(vma->vm_ops);
+               BUG_ON(vma->vm_ops && !(vma->vm_flags & VM_HUGETLB));
                BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
                       vma->vm_userfaultfd_ctx.ctx != ctx);
 
@@ -877,7 +910,8 @@ out_unlock:
                 * userland which ioctls methods are guaranteed to
                 * succeed on this range.
                 */
-               if (put_user(UFFD_API_RANGE_IOCTLS,
+               if (put_user(huge_pages ? UFFD_API_RANGE_IOCTLS_HPAGE :
+                            UFFD_API_RANGE_IOCTLS,
                             &user_uffdio_register->ioctls))
                        ret = -EFAULT;
        }
@@ -924,6 +958,17 @@ static int userfaultfd_unregister(struct userfaultfd_ctx 
*ctx,
                goto out_unlock;
 
        /*
+        * If the first vma contains huge pages, make sure start address
+        * is aligned to huge page size.
+        */
+       if (vma->vm_flags & VM_HUGETLB) {
+               unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
+
+               if (start & (vma_hpagesize - 1))
+                       goto out_unlock;
+       }
+
+       /*
         * Search for not compatible vmas.
         *
         * FIXME: this shall be relaxed later so that it doesn't fail
@@ -945,9 +990,23 @@ static int userfaultfd_unregister(struct userfaultfd_ctx 
*ctx,
                 * provides for more strict behavior to notice
                 * unregistration errors.
                 */
-               if (cur->vm_ops)
+               if (cur->vm_ops && !(cur->vm_flags & VM_HUGETLB))
                        goto out_unlock;
 
+               /*
+                * If this vma contains ending address, and huge pages
+                * check alignment.
+                */
+               if (cur->vm_flags & VM_HUGETLB && end <= cur->vm_end &&
+                                                       end > cur->vm_start) {
+                       unsigned long vma_hpagesize = vma_kernel_pagesize(cur);
+
+                       ret = -EINVAL;
+
+                       if (end & (vma_hpagesize - 1))
+                               goto out_unlock;
+               }
+
                found = true;
        }
        BUG_ON(!found);
@@ -959,7 +1018,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx 
*ctx,
        do {
                cond_resched();
 
-               BUG_ON(vma->vm_ops);
+               BUG_ON(vma->vm_ops && !(vma->vm_flags & VM_HUGETLB));
 
                /*
                 * Nothing to do: this vma is already registered into this
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 9057d7a..751d814 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -26,6 +26,9 @@
        ((__u64)1 << _UFFDIO_WAKE |             \
         (__u64)1 << _UFFDIO_COPY |             \
         (__u64)1 << _UFFDIO_ZEROPAGE)
+#define UFFD_API_RANGE_IOCTLS_HPAGE            \
+       ((__u64)1 << _UFFDIO_WAKE |             \
+        (__u64)1 << _UFFDIO_COPY)
 
 /*
  * Valid ioctl command number range with this API is from 0x00 to
-- 
2.4.11

Reply via email to