Allow UFFDIO_REGISTER_MODE_MINOR on anonymous VMAs when the UFFD_FEATURE_MINOR_ANON feature is enabled.
Replace the bool wp_async parameter in vma_can_userfault() and userfaultfd_register_range() with an extensible ctx_flags bitmap. Add UFFD_CTX_WP_ASYNC and UFFD_CTX_MINOR_ANON flags, and userfaultfd_ctx_flags() to build the bitmap from ctx->features. Add userfaultfd_minor_async() helper for checking async minor mode from the fault path. Gate UFFD_FEATURE_MINOR_ANON and UFFD_FEATURE_MINOR_ASYNC on CONFIG_HAVE_ARCH_USERFAULTFD_MINOR. Validate that MINOR_ASYNC requires at least one minor feature. Not yet visible to userspace (not in UFFD_API_FEATURES). Signed-off-by: Kiryl Shutsemau (Meta) <[email protected]> Assisted-by: Claude:claude-opus-4-6 --- fs/userfaultfd.c | 49 ++++++++++++++++++++++++++++++----- include/linux/userfaultfd_k.h | 19 +++++++++++--- mm/userfaultfd.c | 4 +-- 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index bdc84e5219cd..8d508ad19e89 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -89,6 +89,27 @@ static bool userfaultfd_wp_async_ctx(struct userfaultfd_ctx *ctx) return ctx && (ctx->features & UFFD_FEATURE_WP_ASYNC); } +static bool userfaultfd_minor_anon_ctx(struct userfaultfd_ctx *ctx) +{ + return ctx && (ctx->features & UFFD_FEATURE_MINOR_ANON); +} + +static bool userfaultfd_minor_async_ctx(struct userfaultfd_ctx *ctx) +{ + return ctx && (ctx->features & UFFD_FEATURE_MINOR_ASYNC); +} + +static unsigned int userfaultfd_ctx_flags(struct userfaultfd_ctx *ctx) +{ + unsigned int flags = 0; + + if (userfaultfd_wp_async_ctx(ctx)) + flags |= UFFD_CTX_WP_ASYNC; + if (userfaultfd_minor_anon_ctx(ctx)) + flags |= UFFD_CTX_MINOR_ANON; + return flags; +} + /* * Whether WP_UNPOPULATED is enabled on the uffd context. It is only * meaningful when userfaultfd_wp()==true on the vma and when it's @@ -1271,7 +1292,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, bool basic_ioctls; unsigned long start, end; struct vma_iterator vmi; - bool wp_async = userfaultfd_wp_async_ctx(ctx); + unsigned int ctx_flags = userfaultfd_ctx_flags(ctx); user_uffdio_register = (struct uffdio_register __user *) arg; @@ -1345,7 +1366,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, /* check not compatible vmas */ ret = -EINVAL; - if (!vma_can_userfault(cur, vm_flags, wp_async)) + if (!vma_can_userfault(cur, vm_flags, ctx_flags)) goto out_unlock; /* @@ -1398,7 +1419,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, VM_WARN_ON_ONCE(!found); ret = userfaultfd_register_range(ctx, vma, vm_flags, start, end, - wp_async); + ctx_flags); out_unlock: mmap_write_unlock(mm); @@ -1443,7 +1464,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, unsigned long start, end, vma_end; const void __user *buf = (void __user *)arg; struct vma_iterator vmi; - bool wp_async = userfaultfd_wp_async_ctx(ctx); + unsigned int ctx_flags = userfaultfd_ctx_flags(ctx); ret = -EFAULT; if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) @@ -1505,7 +1526,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * provides for more strict behavior to notice * unregistration errors. */ - if (!vma_can_userfault(cur, cur->vm_flags, wp_async)) + if (!vma_can_userfault(cur, cur->vm_flags, ctx_flags)) goto out_unlock; found = true; @@ -1526,7 +1547,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, goto skip; VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx != ctx); - VM_WARN_ON_ONCE(!vma_can_userfault(vma, vma->vm_flags, wp_async)); + VM_WARN_ON_ONCE(!vma_can_userfault(vma, vma->vm_flags, ctx_flags)); VM_WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE)); if (vma->vm_start > start) @@ -1890,6 +1911,11 @@ bool userfaultfd_wp_async(struct vm_area_struct *vma) return userfaultfd_wp_async_ctx(vma->vm_userfaultfd_ctx.ctx); } +bool userfaultfd_minor_async(struct vm_area_struct *vma) +{ + return userfaultfd_minor_async_ctx(vma->vm_userfaultfd_ctx.ctx); +} + static inline unsigned int uffd_ctx_features(__u64 user_features) { /* @@ -1993,11 +2019,20 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, if (features & UFFD_FEATURE_WP_ASYNC) features |= UFFD_FEATURE_WP_UNPOPULATED; + ret = -EINVAL; + /* MINOR_ASYNC requires at least one minor feature */ + if ((features & UFFD_FEATURE_MINOR_ASYNC) && + !(features & (UFFD_FEATURE_MINOR_ANON | + UFFD_FEATURE_MINOR_HUGETLBFS | + UFFD_FEATURE_MINOR_SHMEM))) + goto err_out; + /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; #ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR uffdio_api.features &= - ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM); + ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM | + UFFD_FEATURE_MINOR_ANON | UFFD_FEATURE_MINOR_ASYNC); #endif if (!pgtable_supports_uffd_wp()) uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index fd5f42765497..d1d4ed4a08b0 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -208,9 +208,13 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) return vma->vm_flags & __VM_UFFD_FLAGS; } +/* Flags for vma_can_userfault() describing uffd context capabilities */ +#define UFFD_CTX_WP_ASYNC (1 << 0) +#define UFFD_CTX_MINOR_ANON (1 << 1) + static inline bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags, - bool wp_async) + unsigned int ctx_flags) { vm_flags &= __VM_UFFD_FLAGS; @@ -218,14 +222,15 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, return false; if ((vm_flags & VM_UFFD_MINOR) && - (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) + !is_vm_hugetlb_page(vma) && !vma_is_shmem(vma) && + !(vma_is_anonymous(vma) && (ctx_flags & UFFD_CTX_MINOR_ANON))) return false; /* * If wp async enabled, and WP is the only mode enabled, allow any * memory type. */ - if (wp_async && (vm_flags == VM_UFFD_WP)) + if ((ctx_flags & UFFD_CTX_WP_ASYNC) && (vm_flags == VM_UFFD_WP)) return true; /* @@ -270,6 +275,7 @@ extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); extern bool userfaultfd_wp_async(struct vm_area_struct *vma); +extern bool userfaultfd_minor_async(struct vm_area_struct *vma); void userfaultfd_reset_ctx(struct vm_area_struct *vma); @@ -283,7 +289,7 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx, struct vm_area_struct *vma, vm_flags_t vm_flags, unsigned long start, unsigned long end, - bool wp_async); + unsigned int ctx_flags); void userfaultfd_release_new(struct userfaultfd_ctx *ctx); @@ -446,6 +452,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) return false; } +static inline bool userfaultfd_minor_async(struct vm_area_struct *vma) +{ + return false; +} + static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) { return false; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 927086bb4a3c..dba1ea26fdfe 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -2008,7 +2008,7 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx, struct vm_area_struct *vma, vm_flags_t vm_flags, unsigned long start, unsigned long end, - bool wp_async) + unsigned int ctx_flags) { VMA_ITERATOR(vmi, ctx->mm, start); struct vm_area_struct *prev = vma_prev(&vmi); @@ -2021,7 +2021,7 @@ int userfaultfd_register_range(struct userfaultfd_ctx *ctx, for_each_vma_range(vmi, vma, end) { cond_resched(); - VM_WARN_ON_ONCE(!vma_can_userfault(vma, vm_flags, wp_async)); + VM_WARN_ON_ONCE(!vma_can_userfault(vma, vm_flags, ctx_flags)); VM_WARN_ON_ONCE(vma->vm_userfaultfd_ctx.ctx && vma->vm_userfaultfd_ctx.ctx != ctx); VM_WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE)); -- 2.51.2

