Hi Peter,

On Tue, Mar 19, 2019 at 11:07:22AM +0800, Peter Xu wrote:
> Add a global sysctl knob "vm.unprivileged_userfaultfd" to control
> whether userfaultfd is allowed by unprivileged users.  When this is
> set to zero, only privileged users (root user, or users with the
> CAP_SYS_PTRACE capability) will be able to use the userfaultfd
> syscalls.
> 
> Suggested-by: Andrea Arcangeli <aarca...@redhat.com>
> Suggested-by: Mike Rapoport <r...@linux.vnet.ibm.com>
> Signed-off-by: Peter Xu <pet...@redhat.com>

Reviewed-by: Mike Rapoport <r...@linux.ibm.com>

Just one minor note below

> ---
>  Documentation/sysctl/vm.txt   | 12 ++++++++++++
>  fs/userfaultfd.c              |  5 +++++
>  include/linux/userfaultfd_k.h |  2 ++
>  kernel/sysctl.c               | 12 ++++++++++++
>  4 files changed, 31 insertions(+)
> 
> diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
> index 187ce4f599a2..f146712f67bb 100644
> --- a/Documentation/sysctl/vm.txt
> +++ b/Documentation/sysctl/vm.txt
> @@ -61,6 +61,7 @@ Currently, these files are in /proc/sys/vm:
>  - stat_refresh
>  - numa_stat
>  - swappiness
> +- unprivileged_userfaultfd
>  - user_reserve_kbytes
>  - vfs_cache_pressure
>  - watermark_boost_factor
> @@ -818,6 +819,17 @@ The default value is 60.
> 
>  ==============================================================
> 
> +unprivileged_userfaultfd
> +
> +This flag controls whether unprivileged users can use the userfaultfd
> +syscalls.  Set this to 1 to allow unprivileged users to use the
> +userfaultfd syscalls, or set this to 0 to restrict userfaultfd to only
> +privileged users (with SYS_CAP_PTRACE capability).

Can you please fully spell "system call"?

> +
> +The default value is 1.
> +
> +==============================================================
> +
>  - user_reserve_kbytes
> 
>  When overcommit_memory is set to 2, "never overcommit" mode, reserve
> diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
> index 89800fc7dc9d..7e856a25cc2f 100644
> --- a/fs/userfaultfd.c
> +++ b/fs/userfaultfd.c
> @@ -30,6 +30,8 @@
>  #include <linux/security.h>
>  #include <linux/hugetlb.h>
> 
> +int sysctl_unprivileged_userfaultfd __read_mostly = 1;
> +
>  static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
> 
>  enum userfaultfd_state {
> @@ -1921,6 +1923,9 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
>       struct userfaultfd_ctx *ctx;
>       int fd;
> 
> +     if (!sysctl_unprivileged_userfaultfd && !capable(CAP_SYS_PTRACE))
> +             return -EPERM;
> +
>       BUG_ON(!current->mm);
> 
>       /* Check the UFFD_* constants for consistency.  */
> diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
> index 37c9eba75c98..ac9d71e24b81 100644
> --- a/include/linux/userfaultfd_k.h
> +++ b/include/linux/userfaultfd_k.h
> @@ -28,6 +28,8 @@
>  #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
>  #define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)
> 
> +extern int sysctl_unprivileged_userfaultfd;
> +
>  extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long 
> reason);
> 
>  extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long 
> dst_start,
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 7578e21a711b..9b8ff1881df9 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -66,6 +66,7 @@
>  #include <linux/kexec.h>
>  #include <linux/bpf.h>
>  #include <linux/mount.h>
> +#include <linux/userfaultfd_k.h>
> 
>  #include <linux/uaccess.h>
>  #include <asm/processor.h>
> @@ -1704,6 +1705,17 @@ static struct ctl_table vm_table[] = {
>               .extra1         = (void *)&mmap_rnd_compat_bits_min,
>               .extra2         = (void *)&mmap_rnd_compat_bits_max,
>       },
> +#endif
> +#ifdef CONFIG_USERFAULTFD
> +     {
> +             .procname       = "unprivileged_userfaultfd",
> +             .data           = &sysctl_unprivileged_userfaultfd,
> +             .maxlen         = sizeof(sysctl_unprivileged_userfaultfd),
> +             .mode           = 0644,
> +             .proc_handler   = proc_dointvec_minmax,
> +             .extra1         = &zero,
> +             .extra2         = &one,
> +     },
>  #endif
>       { }
>  };
> -- 
> 2.17.1
> 

-- 
Sincerely yours,
Mike.

Reply via email to