On Sat, Dec 22, 2018 at 10:18:06PM +0100, Christian Brauner wrote:
> Since binderfs can be mounted by userns root in non-initial user namespaces
> some precautions are in order. First, a way to set a maximum on the number
> of binder devices that can be allocated per binderfs instance and second, a
> way to reserve a reasonable chunk of binderfs devices for the initial ipc
> namespace.
> A first approach as seen in [1] used sysctls similiar to devpts but was
> shown to be flawed (cf. [2] and [3]) since some aspects were unneeded. This
> is an alternative approach which avoids sysctls completely and instead
> switches to a single mount option.
> 
> Starting with this commit binderfs instances can be mounted with a limit on
> the number of binder devices that can be allocated. The max=<count> mount
> option serves as a per-instance limit. If max=<count> is set then only
> <count> number of binder devices can be allocated in this binderfs
> instance.
> Additionally, the binderfs instance in the initial ipc namespace will
> always have a reserve of at least 1024 binder devices unless explicitly
> capped via max=<count>.
> 
> This allows to safely bind-mount binderfs instances into unprivileged user
> namespaces since userns root in a non-initial user namespace cannot change
> the mount option as long as it does not own the mount namespace the
> binderfs mount was created in and hence cannot drain the host of minor
> device numbers
> 
> [1]: https://lore.kernel.org/lkml/20181221133909.18794-1-christ...@brauner.io/
> [2]; https://lore.kernel.org/lkml/20181221163316.ga8...@kroah.com/
> [3]: 
> https://lore.kernel.org/lkml/cahrssex+gdvw4fkkk8oznair9g5icjlyodo8hykv3o0o1jt...@mail.gmail.com/
> [4]: https://lore.kernel.org/lkml/20181221192044.5yvfnuri7gdop...@brauner.io/
> 
> Cc: Todd Kjos <tk...@google.com>
> Cc: Greg Kroah-Hartman <gre...@linuxfoundation.org>
> Signed-off-by: Christian Brauner <christian.brau...@ubuntu.com>
> ---
>  drivers/android/binderfs.c | 109 +++++++++++++++++++++++++++++++++++--
>  1 file changed, 104 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
> index 7496b10532aa..93aee00994d4 100644
> --- a/drivers/android/binderfs.c
> +++ b/drivers/android/binderfs.c
> @@ -20,6 +20,7 @@
>  #include <linux/parser.h>
>  #include <linux/radix-tree.h>
>  #include <linux/sched.h>
> +#include <linux/seq_file.h>
>  #include <linux/slab.h>
>  #include <linux/spinlock_types.h>
>  #include <linux/stddef.h>
> @@ -39,6 +40,11 @@
>  #define INODE_OFFSET 3
>  #define INTSTRLEN 21
>  #define BINDERFS_MAX_MINOR (1U << MINORBITS)
> +/*
> + * Ensure that the initial ipc namespace always has a good chunk of devices
> + * available.
> + */
> +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 1024)
>  
>  static struct vfsmount *binderfs_mnt;
>  
> @@ -46,6 +52,24 @@ static dev_t binderfs_dev;
>  static DEFINE_MUTEX(binderfs_minors_mutex);
>  static DEFINE_IDA(binderfs_minors);
>  
> +/**
> + * binderfs_mount_opts - mount options for binderfs
> + * @max: maximum number of allocatable binderfs binder devices
> + */
> +struct binderfs_mount_opts {
> +     int max;
> +};
> +
> +enum {
> +     Opt_max,
> +     Opt_err
> +};
> +
> +static const match_table_t tokens = {
> +     { Opt_max, "max=%d" },
> +     { Opt_err, NULL     }
> +};
> +
>  /**
>   * binderfs_info - information about a binderfs mount
>   * @ipc_ns:         The ipc namespace the binderfs mount belongs to.
> @@ -55,13 +79,16 @@ static DEFINE_IDA(binderfs_minors);
>   *                  created.
>   * @root_gid:       gid that needs to be used when a new binder device is
>   *                  created.
> + * @mount_opts:     The mount options in use.
> + * @device_count:   The current number of allocated binder devices.
>   */
>  struct binderfs_info {
>       struct ipc_namespace *ipc_ns;
>       struct dentry *control_dentry;
>       kuid_t root_uid;
>       kgid_t root_gid;
> -
> +     struct binderfs_mount_opts mount_opts;
> +     atomic_t device_count;
>  };
>  
>  static inline struct binderfs_info *BINDERFS_I(const struct inode *inode)
> @@ -107,13 +134,22 @@ static int binderfs_binder_device_create(struct inode 
> *ref_inode,
>       struct inode *inode = NULL;
>       struct super_block *sb = ref_inode->i_sb;
>       struct binderfs_info *info = sb->s_fs_info;
> +     bool use_reserve = (info->ipc_ns == &init_ipc_ns);
>  
>       /* Reserve new minor number for the new device. */
>       mutex_lock(&binderfs_minors_mutex);
> -     minor = ida_alloc_max(&binderfs_minors, BINDERFS_MAX_MINOR, GFP_KERNEL);
> +     if (atomic_inc_return(&info->device_count) < info->mount_opts.max)
> +             minor = ida_alloc_max(&binderfs_minors,
> +                                   use_reserve ? BINDERFS_MAX_MINOR :
> +                                                 BINDERFS_MAX_MINOR_CAPPED,
> +                                   GFP_KERNEL);
> +     else
> +             minor = -ENOSPC;
>       mutex_unlock(&binderfs_minors_mutex);
> -     if (minor < 0)
> +     if (minor < 0) {
> +             atomic_dec(&info->device_count);
>               return minor;
> +     }
>  
>       ret = -ENOMEM;
>       device = kzalloc(sizeof(*device), GFP_KERNEL);
> @@ -187,6 +223,7 @@ static int binderfs_binder_device_create(struct inode 
> *ref_inode,
>       kfree(name);
>       kfree(device);
>       mutex_lock(&binderfs_minors_mutex);
> +     atomic_dec(&info->device_count);
>       ida_free(&binderfs_minors, minor);
>       mutex_unlock(&binderfs_minors_mutex);
>       iput(inode);
> @@ -232,6 +269,7 @@ static long binder_ctl_ioctl(struct file *file, unsigned 
> int cmd,
>  static void binderfs_evict_inode(struct inode *inode)
>  {
>       struct binder_device *device = inode->i_private;
> +     struct binderfs_info *info = BINDERFS_I(inode);
>  
>       clear_inode(inode);
>  
> @@ -239,6 +277,7 @@ static void binderfs_evict_inode(struct inode *inode)
>               return;
>  
>       mutex_lock(&binderfs_minors_mutex);
> +     atomic_dec(&info->device_count);
>       ida_free(&binderfs_minors, device->miscdev.minor);
>       mutex_unlock(&binderfs_minors_mutex);
>  
> @@ -246,9 +285,64 @@ static void binderfs_evict_inode(struct inode *inode)
>       kfree(device);
>  }
>  
> +/**
> + * binderfs_parse_mount_opts - parse binderfs mount options
> + * @data: options to set (can be NULL in which case defaults are used)
> + */
> +static int binderfs_parse_mount_opts(char *data,
> +                                  struct binderfs_mount_opts *opts)
> +{
> +     char *p;
> +     opts->max = BINDERFS_MAX_MINOR;
> +
> +     while ((p = strsep(&data, ",")) != NULL) {
> +             substring_t args[MAX_OPT_ARGS];
> +             int token;
> +             int max_devices;
> +
> +             if (!*p)
> +                     continue;
> +
> +             token = match_token(p, tokens, args);
> +             switch (token) {
> +             case Opt_max:
> +                     max_devices = match_int(&args[0], &max_devices);
> +                     if (max_devices < 0 || (max_devices > 
> BINDERFS_MAX_MINOR))
> +                             return -EINVAL;

One should always account for their own stupid mistakes and here's one
I made: the ways match_int() is used here is nonsense since it
overwrites the parsed max_devices value. This should be:

if (match_int(&args[0], &max_devices) ||
    (max_devices < 0 ||
     (max_devices > BINDERFS_MAX_MINOR)))
        return -EINVAL;

Good that we have to have another version anyway. Sorry about that.

Christian

> +
> +                     opts->max = max_devices;
> +                     break;
> +             default:
> +                     pr_err("Invalid mount options\n");
> +                     return -EINVAL;
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> +static int binderfs_remount(struct super_block *sb, int *flags, char *data)
> +{
> +     struct binderfs_info *info = sb->s_fs_info;
> +     return binderfs_parse_mount_opts(data, &info->mount_opts);
> +}
> +
> +static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry 
> *root)
> +{
> +     struct binderfs_info *info;
> +
> +     info = root->d_sb->s_fs_info;
> +     if (info->mount_opts.max < BINDERFS_MAX_MINOR)
> +             seq_printf(seq, ",max=%d", info->mount_opts.max);
> +
> +     return 0;
> +}
> +
>  static const struct super_operations binderfs_super_ops = {
> -     .statfs = simple_statfs,
> -     .evict_inode = binderfs_evict_inode,
> +     .evict_inode    = binderfs_evict_inode,
> +     .remount_fs     = binderfs_remount,
> +     .show_options   = binderfs_show_mount_opts,
> +     .statfs         = simple_statfs,
>  };
>  
>  static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry,
> @@ -409,6 +503,10 @@ static int binderfs_fill_super(struct super_block *sb, 
> void *data, int silent)
>       if (!info)
>               goto err_without_dentry;
>  
> +     ret = binderfs_parse_mount_opts(data, &info->mount_opts);
> +     if (ret)
> +             goto err_without_dentry;
> +
>       info->ipc_ns = ipc_ns;
>       info->root_gid = make_kgid(sb->s_user_ns, 0);
>       if (!gid_valid(info->root_gid))
> @@ -416,6 +514,7 @@ static int binderfs_fill_super(struct super_block *sb, 
> void *data, int silent)
>       info->root_uid = make_kuid(sb->s_user_ns, 0);
>       if (!uid_valid(info->root_uid))
>               info->root_uid = GLOBAL_ROOT_UID;
> +     atomic_set(&info->device_count, 0);
>  
>       sb->s_fs_info = info;
>  
> -- 
> 2.19.1
> 
_______________________________________________
devel mailing list
de...@linuxdriverproject.org
http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel

Reply via email to