On 10/11/2016 10:36 AM, Nikolay Borisov wrote:
> This patchset converts inotify to using the newly introduced
> per-userns sysctl infrastructure.
> 
> Currently the inotify instances/watches are being accounted in the
> user_struct structure. This means that in setups where multiple
> users in unprivileged containers map to the same underlying
> real user (i.e. pointing to the same user_struct) the inotify limits
> are going to be shared as well, allowing one user(or application) to exhaust
> all others limits.
> 
> Fix this by switching the inotify sysctls to using the
> per-namespace/per-user limits. This will allow the server admin to
> set sensible global limits, which can further be tuned inside every
> individual user namespace. Additionally, in order to preserve the
> sysctl ABI make the existing inotify instances/watches sysctls
> modify the values of the initial user namespace.
> 
> Signed-off-by: Nikolay Borisov <ker...@kyup.com>
> ---
> 
> So here is a revised version which retains the existing sysctls,
> and hooks them to the init_user_ns values. 

Gentle ping, now that rc1 has shipped and Jan's sysctl concern hopefully
resolved.

> 
>  fs/notify/inotify/inotify.h          | 17 +++++++++++++++++
>  fs/notify/inotify/inotify_fsnotify.c |  6 ++----
>  fs/notify/inotify/inotify_user.c     | 34 +++++++++++++++++-----------------
>  include/linux/fsnotify_backend.h     |  3 ++-
>  include/linux/sched.h                |  4 ----
>  include/linux/user_namespace.h       |  4 ++++
>  kernel/ucount.c                      |  6 +++++-
>  7 files changed, 47 insertions(+), 27 deletions(-)
> 
> diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
> index ed855ef6f077..b5536f8ad3e0 100644
> --- a/fs/notify/inotify/inotify.h
> +++ b/fs/notify/inotify/inotify.h
> @@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group 
> *group,
>                               const unsigned char *file_name, u32 cookie);
>  
>  extern const struct fsnotify_ops inotify_fsnotify_ops;
> +
> +#ifdef CONFIG_INOTIFY_USER
> +static void dec_inotify_instances(struct ucounts *ucounts)
> +{
> +     dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES);
> +}
> +
> +static struct ucounts *inc_inotify_watches(struct ucounts *ucounts)
> +{
> +     return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES);
> +}
> +
> +static void dec_inotify_watches(struct ucounts *ucounts)
> +{
> +     dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES);
> +}
> +#endif
> diff --git a/fs/notify/inotify/inotify_fsnotify.c 
> b/fs/notify/inotify/inotify_fsnotify.c
> index 2cd900c2c737..1e6b3cfc2cfd 100644
> --- a/fs/notify/inotify/inotify_fsnotify.c
> +++ b/fs/notify/inotify/inotify_fsnotify.c
> @@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct 
> fsnotify_group *group)
>       /* ideally the idr is empty and we won't hit the BUG in the callback */
>       idr_for_each(&group->inotify_data.idr, idr_callback, group);
>       idr_destroy(&group->inotify_data.idr);
> -     if (group->inotify_data.user) {
> -             atomic_dec(&group->inotify_data.user->inotify_devs);
> -             free_uid(group->inotify_data.user);
> -     }
> +     if (group->inotify_data.ucounts)
> +             dec_inotify_instances(group->inotify_data.ucounts);
>  }
>  
>  static void inotify_free_event(struct fsnotify_event *fsn_event)
> diff --git a/fs/notify/inotify/inotify_user.c 
> b/fs/notify/inotify/inotify_user.c
> index b8d08d0d0a4d..7d769a824742 100644
> --- a/fs/notify/inotify/inotify_user.c
> +++ b/fs/notify/inotify/inotify_user.c
> @@ -44,10 +44,8 @@
>  
>  #include <asm/ioctls.h>
>  
> -/* these are configurable via /proc/sys/fs/inotify/ */
> -static int inotify_max_user_instances __read_mostly;
> +/* configurable via /proc/sys/fs/inotify/ */
>  static int inotify_max_queued_events __read_mostly;
> -static int inotify_max_user_watches __read_mostly;
>  
>  static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
>  
> @@ -60,7 +58,7 @@ static int zero;
>  struct ctl_table inotify_table[] = {
>       {
>               .procname       = "max_user_instances",
> -             .data           = &inotify_max_user_instances,
> +             .data           = 
> &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES],
>               .maxlen         = sizeof(int),
>               .mode           = 0644,
>               .proc_handler   = proc_dointvec_minmax,
> @@ -68,7 +66,7 @@ struct ctl_table inotify_table[] = {
>       },
>       {
>               .procname       = "max_user_watches",
> -             .data           = &inotify_max_user_watches,
> +             .data           = 
> &init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES],
>               .maxlen         = sizeof(int),
>               .mode           = 0644,
>               .proc_handler   = proc_dointvec_minmax,
> @@ -500,7 +498,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark 
> *fsn_mark,
>       /* remove this mark from the idr */
>       inotify_remove_from_idr(group, i_mark);
>  
> -     atomic_dec(&group->inotify_data.user->inotify_watches);
> +     dec_inotify_watches(group->inotify_data.ucounts);
>  }
>  
>  /* ding dong the mark is dead */
> @@ -584,14 +582,17 @@ static int inotify_new_watch(struct fsnotify_group 
> *group,
>       tmp_i_mark->fsn_mark.mask = mask;
>       tmp_i_mark->wd = -1;
>  
> -     ret = -ENOSPC;
> -     if (atomic_read(&group->inotify_data.user->inotify_watches) >= 
> inotify_max_user_watches)
> -             goto out_err;
> -
>       ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
>       if (ret)
>               goto out_err;
>  
> +     /* increment the number of watches the user has */
> +     if (!inc_inotify_watches(group->inotify_data.ucounts)) {
> +             inotify_remove_from_idr(group, tmp_i_mark);
> +             ret = -ENOSPC;
> +             goto out_err;
> +     }
> +
>       /* we are on the idr, now get on the inode */
>       ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
>                                      NULL, 0);
> @@ -601,8 +602,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
>               goto out_err;
>       }
>  
> -     /* increment the number of watches the user has */
> -     atomic_inc(&group->inotify_data.user->inotify_watches);
>  
>       /* return the watch descriptor for this new mark */
>       ret = tmp_i_mark->wd;
> @@ -653,10 +652,11 @@ static struct fsnotify_group 
> *inotify_new_group(unsigned int max_events)
>  
>       spin_lock_init(&group->inotify_data.idr_lock);
>       idr_init(&group->inotify_data.idr);
> -     group->inotify_data.user = get_current_user();
> +     group->inotify_data.ucounts = inc_ucount(current_user_ns(),
> +                                              current_euid(),
> +                                              UCOUNT_INOTIFY_INSTANCES);
>  
> -     if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
> -         inotify_max_user_instances) {
> +     if (!group->inotify_data.ucounts) {
>               fsnotify_destroy_group(group);
>               return ERR_PTR(-EMFILE);
>       }
> @@ -819,8 +819,8 @@ static int __init inotify_user_setup(void)
>       inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
>  
>       inotify_max_queued_events = 16384;
> -     inotify_max_user_instances = 128;
> -     inotify_max_user_watches = 8192;
> +     init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
> +     init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
>  
>       return 0;
>  }
> diff --git a/include/linux/fsnotify_backend.h 
> b/include/linux/fsnotify_backend.h
> index 58205f33af02..892959de1162 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -16,6 +16,7 @@
>  #include <linux/spinlock.h>
>  #include <linux/types.h>
>  #include <linux/atomic.h>
> +#include <linux/user_namespace.h>
>  
>  /*
>   * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
> @@ -169,7 +170,7 @@ struct fsnotify_group {
>               struct inotify_group_private_data {
>                       spinlock_t      idr_lock;
>                       struct idr      idr;
> -                     struct user_struct      *user;
> +                     struct ucounts *ucounts;
>               } inotify_data;
>  #endif
>  #ifdef CONFIG_FANOTIFY
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 62c68e513e39..35230a2c73ac 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -840,10 +840,6 @@ struct user_struct {
>       atomic_t __count;       /* reference count */
>       atomic_t processes;     /* How many processes does this user have? */
>       atomic_t sigpending;    /* How many pending signals does this user 
> have? */
> -#ifdef CONFIG_INOTIFY_USER
> -     atomic_t inotify_watches; /* How many inotify watches does this user 
> have? */
> -     atomic_t inotify_devs;  /* How many inotify devs does this user have 
> opened? */
> -#endif
>  #ifdef CONFIG_FANOTIFY
>       atomic_t fanotify_listeners;
>  #endif
> diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
> index eb209d4523f5..cbcac7a5ec41 100644
> --- a/include/linux/user_namespace.h
> +++ b/include/linux/user_namespace.h
> @@ -32,6 +32,10 @@ enum ucount_type {
>       UCOUNT_NET_NAMESPACES,
>       UCOUNT_MNT_NAMESPACES,
>       UCOUNT_CGROUP_NAMESPACES,
> +#ifdef CONFIG_INOTIFY_USER 
> +     UCOUNT_INOTIFY_INSTANCES,
> +     UCOUNT_INOTIFY_WATCHES,
> +#endif
>       UCOUNT_COUNTS,
>  };
>  
> diff --git a/kernel/ucount.c b/kernel/ucount.c
> index 9d20d5dd298a..a6bcea47306b 100644
> --- a/kernel/ucount.c
> +++ b/kernel/ucount.c
> @@ -57,7 +57,7 @@ static struct ctl_table_root set_root = {
>  
>  static int zero = 0;
>  static int int_max = INT_MAX;
> -#define UCOUNT_ENTRY(name)                           \
> +#define UCOUNT_ENTRY(name)                           \
>       {                                               \
>               .procname       = name,                 \
>               .maxlen         = sizeof(int),          \
> @@ -74,6 +74,10 @@ static struct ctl_table user_table[] = {
>       UCOUNT_ENTRY("max_net_namespaces"),
>       UCOUNT_ENTRY("max_mnt_namespaces"),
>       UCOUNT_ENTRY("max_cgroup_namespaces"),
> +#ifdef CONFIG_INOTIFY_USER_
> +     UCOUNT_ENTRY("max_inotify_instances"),
> +     UCOUNT_ENTRY("max_inotify_watches"),
> +#endif
>       { }
>  };
>  #endif /* CONFIG_SYSCTL */
> 

Reply via email to