https://jira.sw.ru/browse/PSBM-34438
(This fix was adapted from PCS6.) It is possible for a container to create lots of mount points, which may make operations with them slower. As some of these operations take global locks (namespace_sem, vfsmount_lock), it might affect other containers as well. Let us limit the maximum number of mount points a VE may create. The limit can be customized via /proc/sys/fs/ve-mount-nr knob. Signed-off-by: Evgenii Shatokhin <eshatok...@odin.com> --- fs/mount.h | 2 ++ fs/namespace.c | 34 ++++++++++++++++++++++++++++++++-- include/linux/mnt_namespace.h | 2 ++ include/linux/ve.h | 1 + include/linux/ve_proto.h | 11 +++++++++++ kernel/sysctl.c | 8 ++++++++ kernel/ve/ve.c | 2 ++ 7 files changed, 58 insertions(+), 2 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 3e16b57..e192ff9 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -50,6 +50,8 @@ struct mount { int mnt_expiry_mark; /* true if marked for expiry */ int mnt_pinned; int mnt_ghosts; + + envid_t mnt_owner_veid; }; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ diff --git a/fs/namespace.c b/fs/namespace.c index 8909c13..be80e8d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -42,6 +42,14 @@ static struct list_head mountpoint_hashtable[HASH_SIZE]; static struct kmem_cache *mnt_cache __read_mostly; static struct rw_semaphore namespace_sem; +/* + * Operations with a big amount of mount points can require a lot of time. + * These operations take the global lock namespace_sem, so they can affect + * other containers. Let us allow no more than sysctl_ve_mount_nr mount + * points for a VE. + */ +unsigned int sysctl_ve_mount_nr = 4096; + /* /sys/fs */ struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); @@ -165,7 +173,15 @@ unsigned int mnt_get_count(struct mount *mnt) static struct mount *alloc_vfsmnt(const char *name) { - struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); + struct mount *mnt; + struct ve_struct *ve = get_exec_env(); + + if (ve && + atomic_add_return(1, &ve->mnt_nr) > sysctl_ve_mount_nr && + !ve_is_super(ve)) + goto out_mnt_nr_dec; + + mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); if (mnt) { int err; @@ -190,6 +206,8 @@ static struct mount *alloc_vfsmnt(const char *name) mnt->mnt_writers = 0; #endif + mnt->mnt_owner_veid = get_veid(ve); + INIT_LIST_HEAD(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); @@ -201,7 +219,9 @@ static struct mount *alloc_vfsmnt(const char *name) #ifdef CONFIG_FSNOTIFY INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif - } + } else + goto out_mnt_nr_dec; + return mnt; #ifdef CONFIG_SMP @@ -212,6 +232,9 @@ out_free_id: mnt_free_id(mnt); out_free_cache: kmem_cache_free(mnt_cache, mnt); +out_mnt_nr_dec: + if (ve) + atomic_dec(&ve->mnt_nr); return NULL; } @@ -542,6 +565,13 @@ int sb_prepare_remount_readonly(struct super_block *sb) static void free_vfsmnt(struct mount *mnt) { + struct ve_struct *ve = get_ve_by_id(mnt->mnt_owner_veid); + + if (ve) { + atomic_dec(&ve->mnt_nr); + put_ve(ve); + } + kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 12b2ab5..4d83d3c 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -14,5 +14,7 @@ extern const struct file_operations proc_mounts_operations; extern const struct file_operations proc_mountinfo_operations; extern const struct file_operations proc_mountstats_operations; +extern unsigned int sysctl_ve_mount_nr; + #endif #endif diff --git a/include/linux/ve.h b/include/linux/ve.h index 86b95c3..c6530c2 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -128,6 +128,7 @@ struct ve_struct { unsigned long aio_nr; unsigned long aio_max_nr; #endif + atomic_t mnt_nr; }; struct ve_devmnt { diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h index 0f5898e..3894c60 100644 --- a/include/linux/ve_proto.h +++ b/include/linux/ve_proto.h @@ -32,6 +32,7 @@ static inline bool ve_is_super(struct ve_struct *ve) #define get_exec_env() (current->task_ve) #define get_env_init(ve) (ve->ve_ns->pid_ns->child_reaper) +#define get_veid(ve) (ve->veid) const char *ve_name(struct ve_struct *ve); @@ -124,6 +125,11 @@ static inline struct ve_struct *get_exec_env(void) #define get_env_init(ve) (ve->ve_ns->pid_ns->child_reaper) +static inline envid_t get_veid(struct ve_struct *ve) +{ + return 0; +} + static inline bool ve_is_super(struct ve_struct *ve) { return true; @@ -139,6 +145,11 @@ static inline const char *task_ve_name(struct task_struct *task) return "0"; } +static inline struct ve_struct *get_ve_by_id(envid_t veid) +{ + return NULL; +} + #define nr_threads_ve(ve) (nr_threads) #endif /* CONFIG_VE */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9c081e3..ba69200 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -64,6 +64,7 @@ #include <linux/sched/sysctl.h> #include <linux/kexec.h> #include <linux/ve.h> +#include <linux/mnt_namespace.h> #include <asm/uaccess.h> #include <asm/processor.h> @@ -1740,6 +1741,13 @@ static struct ctl_table fs_table[] = { .proc_handler = &pipe_proc_fn, .extra1 = &pipe_min_size, }, + { + .procname = "ve-mount-nr", + .data = &sysctl_ve_mount_nr, + .maxlen = sizeof(sysctl_ve_mount_nr), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { } }; diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index e9219e6..216608c 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -82,6 +82,7 @@ struct ve_struct ve0 = { #endif .sched_lat_ve.cur = &ve0_lat_stats, .init_cred = &init_cred, + .mnt_nr = ATOMIC_INIT(0), }; EXPORT_SYMBOL(ve0); @@ -653,6 +654,7 @@ do_init: ve->aio_nr = 0; ve->aio_max_nr = AIO_MAX_NR_DEFAULT; #endif + atomic_set(&ve->mnt_nr, 0); return &ve->css; -- 2.3.2 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel