This patchset converts inotify to using the newly introduced
per-userns sysctl infrastructure.

Currently the inotify instances/watches are being accounted in the
user_struct structure. This means that in setups where multiple
users in unprivileged containers map to the same underlying
real user (i.e. pointing to the same user_struct) the inotify limits
are going to be shared as well, allowing one user(or application) to exhaust
all others limits.

Fix this by switching the inotify sysctls to using the
per-namespace/per-user limits. This will allow the server admin to
set sensible global limits, which can further be tuned inside every
individual user namespace.

Signed-off-by: Nikolay Borisov <[email protected]>
---
Hello Eric, 

I saw you've finally sent your pull request for 4.9 and it 
includes your implementatino of the ucount infrastructure. So 
here is my respin of the inotify patches using that.

 fs/notify/inotify/inotify.h          | 17 +++++++++++++
 fs/notify/inotify/inotify_fsnotify.c |  6 ++---
 fs/notify/inotify/inotify_user.c     | 46 ++++++++++++------------------------
 include/linux/fsnotify_backend.h     |  3 ++-
 include/linux/sched.h                |  4 ----
 include/linux/user_namespace.h       |  4 ++++
 kernel/ucount.c                      |  6 ++++-
 7 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index ed855ef6f077..b5536f8ad3e0 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -30,3 +30,20 @@ extern int inotify_handle_event(struct fsnotify_group *group,
                                const unsigned char *file_name, u32 cookie);
 
 extern const struct fsnotify_ops inotify_fsnotify_ops;
+
+#ifdef CONFIG_INOTIFY_USER
+static void dec_inotify_instances(struct ucounts *ucounts)
+{
+       dec_ucount(ucounts, UCOUNT_INOTIFY_INSTANCES);
+}
+
+static struct ucounts *inc_inotify_watches(struct ucounts *ucounts)
+{
+       return inc_ucount(ucounts->ns, ucounts->uid, UCOUNT_INOTIFY_WATCHES);
+}
+
+static void dec_inotify_watches(struct ucounts *ucounts)
+{
+       dec_ucount(ucounts, UCOUNT_INOTIFY_WATCHES);
+}
+#endif
diff --git a/fs/notify/inotify/inotify_fsnotify.c 
b/fs/notify/inotify/inotify_fsnotify.c
index 2cd900c2c737..1e6b3cfc2cfd 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -165,10 +165,8 @@ static void inotify_free_group_priv(struct fsnotify_group 
*group)
        /* ideally the idr is empty and we won't hit the BUG in the callback */
        idr_for_each(&group->inotify_data.idr, idr_callback, group);
        idr_destroy(&group->inotify_data.idr);
-       if (group->inotify_data.user) {
-               atomic_dec(&group->inotify_data.user->inotify_devs);
-               free_uid(group->inotify_data.user);
-       }
+       if (group->inotify_data.ucounts)
+               dec_inotify_instances(group->inotify_data.ucounts);
 }
 
 static void inotify_free_event(struct fsnotify_event *fsn_event)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index b8d08d0d0a4d..cfe48bb511c6 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -44,10 +44,8 @@
 
 #include <asm/ioctls.h>
 
-/* these are configurable via /proc/sys/fs/inotify/ */
-static int inotify_max_user_instances __read_mostly;
+/* configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_queued_events __read_mostly;
-static int inotify_max_user_watches __read_mostly;
 
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 
@@ -59,22 +57,6 @@ static int zero;
 
 struct ctl_table inotify_table[] = {
        {
-               .procname       = "max_user_instances",
-               .data           = &inotify_max_user_instances,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-       },
-       {
-               .procname       = "max_user_watches",
-               .data           = &inotify_max_user_watches,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &zero,
-       },
-       {
                .procname       = "max_queued_events",
                .data           = &inotify_max_queued_events,
                .maxlen         = sizeof(int),
@@ -500,7 +482,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark 
*fsn_mark,
        /* remove this mark from the idr */
        inotify_remove_from_idr(group, i_mark);
 
-       atomic_dec(&group->inotify_data.user->inotify_watches);
+       dec_inotify_watches(group->inotify_data.ucounts);
 }
 
 /* ding dong the mark is dead */
@@ -584,14 +566,17 @@ static int inotify_new_watch(struct fsnotify_group *group,
        tmp_i_mark->fsn_mark.mask = mask;
        tmp_i_mark->wd = -1;
 
-       ret = -ENOSPC;
-       if (atomic_read(&group->inotify_data.user->inotify_watches) >= 
inotify_max_user_watches)
-               goto out_err;
-
        ret = inotify_add_to_idr(idr, idr_lock, tmp_i_mark);
        if (ret)
                goto out_err;
 
+       /* increment the number of watches the user has */
+       if (!inc_inotify_watches(group->inotify_data.ucounts)) {
+               inotify_remove_from_idr(group, tmp_i_mark);
+               ret = -ENOSPC;
+               goto out_err;
+       }
+
        /* we are on the idr, now get on the inode */
        ret = fsnotify_add_mark_locked(&tmp_i_mark->fsn_mark, group, inode,
                                       NULL, 0);
@@ -601,8 +586,6 @@ static int inotify_new_watch(struct fsnotify_group *group,
                goto out_err;
        }
 
-       /* increment the number of watches the user has */
-       atomic_inc(&group->inotify_data.user->inotify_watches);
 
        /* return the watch descriptor for this new mark */
        ret = tmp_i_mark->wd;
@@ -653,10 +636,11 @@ static struct fsnotify_group *inotify_new_group(unsigned 
int max_events)
 
        spin_lock_init(&group->inotify_data.idr_lock);
        idr_init(&group->inotify_data.idr);
-       group->inotify_data.user = get_current_user();
+       group->inotify_data.ucounts = inc_ucount(current_user_ns(),
+                                                current_euid(),
+                                                UCOUNT_INOTIFY_INSTANCES);
 
-       if (atomic_inc_return(&group->inotify_data.user->inotify_devs) >
-           inotify_max_user_instances) {
+       if (!group->inotify_data.ucounts) {
                fsnotify_destroy_group(group);
                return ERR_PTR(-EMFILE);
        }
@@ -819,8 +803,8 @@ static int __init inotify_user_setup(void)
        inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
 
        inotify_max_queued_events = 16384;
-       inotify_max_user_instances = 128;
-       inotify_max_user_watches = 8192;
+       init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
+       init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
 
        return 0;
 }
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 58205f33af02..892959de1162 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -16,6 +16,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 #include <linux/atomic.h>
+#include <linux/user_namespace.h>
 
 /*
  * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
@@ -169,7 +170,7 @@ struct fsnotify_group {
                struct inotify_group_private_data {
                        spinlock_t      idr_lock;
                        struct idr      idr;
-                       struct user_struct      *user;
+                       struct ucounts *ucounts;
                } inotify_data;
 #endif
 #ifdef CONFIG_FANOTIFY
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c68e513e39..35230a2c73ac 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -840,10 +840,6 @@ struct user_struct {
        atomic_t __count;       /* reference count */
        atomic_t processes;     /* How many processes does this user have? */
        atomic_t sigpending;    /* How many pending signals does this user 
have? */
-#ifdef CONFIG_INOTIFY_USER
-       atomic_t inotify_watches; /* How many inotify watches does this user 
have? */
-       atomic_t inotify_devs;  /* How many inotify devs does this user have 
opened? */
-#endif
 #ifdef CONFIG_FANOTIFY
        atomic_t fanotify_listeners;
 #endif
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index eb209d4523f5..cbcac7a5ec41 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -32,6 +32,10 @@ enum ucount_type {
        UCOUNT_NET_NAMESPACES,
        UCOUNT_MNT_NAMESPACES,
        UCOUNT_CGROUP_NAMESPACES,
+#ifdef CONFIG_INOTIFY_USER 
+       UCOUNT_INOTIFY_INSTANCES,
+       UCOUNT_INOTIFY_WATCHES,
+#endif
        UCOUNT_COUNTS,
 };
 
diff --git a/kernel/ucount.c b/kernel/ucount.c
index 9d20d5dd298a..a6bcea47306b 100644
--- a/kernel/ucount.c
+++ b/kernel/ucount.c
@@ -57,7 +57,7 @@ static struct ctl_table_root set_root = {
 
 static int zero = 0;
 static int int_max = INT_MAX;
-#define UCOUNT_ENTRY(name)                             \
+#define UCOUNT_ENTRY(name)                             \
        {                                               \
                .procname       = name,                 \
                .maxlen         = sizeof(int),          \
@@ -74,6 +74,10 @@ static struct ctl_table user_table[] = {
        UCOUNT_ENTRY("max_net_namespaces"),
        UCOUNT_ENTRY("max_mnt_namespaces"),
        UCOUNT_ENTRY("max_cgroup_namespaces"),
+#ifdef CONFIG_INOTIFY_USER_
+       UCOUNT_ENTRY("max_inotify_instances"),
+       UCOUNT_ENTRY("max_inotify_watches"),
+#endif
        { }
 };
 #endif /* CONFIG_SYSCTL */
-- 
2.5.0

Reply via email to