https://jira.sw.ru/browse/PSBM-34438
(This fix was adapted from PCS6.) It is possible for a container to create lots of mount points, which may make operations with them slower. As some of these operations take global locks (namespace_sem, vfsmount_lock), it might affect other containers as well. Let us limit the maximum number of mount points a VE may create. The limit can be customized via /proc/sys/fs/ve-mount-nr knob. Changes in v.3: * Revisited VE-specific parts of the patch to reduce the impact on the generic code. Changes in v.2: * The situations where VE0 mounts something and another VE unmounts it seem to be of no concern. If so, it is OK not to alter struct mount: the mount counter for a VE may become unbalanced but this is acceptable here. * The sysctl knob is now defined alongside other VE sysctls. Signed-off-by: Evgenii Shatokhin <eshatok...@odin.com> --- fs/namespace.c | 9 ++++++++- include/linux/ve.h | 27 +++++++++++++++++++++++++++ kernel/ve/ve.c | 2 ++ kernel/ve/veowner.c | 15 +++++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 8909c13..b4ea5a5 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -165,7 +165,12 @@ unsigned int mnt_get_count(struct mount *mnt) static struct mount *alloc_vfsmnt(const char *name) { - struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); + struct mount *mnt; + + if (!ve_mount_allowed()) + return NULL; + + mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); if (mnt) { int err; @@ -202,6 +207,7 @@ static struct mount *alloc_vfsmnt(const char *name) INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks); #endif } + ve_mount_nr_inc(); return mnt; #ifdef CONFIG_SMP @@ -542,6 +548,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) static void free_vfsmnt(struct mount *mnt) { + ve_mount_nr_dec(); kfree(mnt->mnt_devname); mnt_free_id(mnt); #ifdef CONFIG_SMP diff --git a/include/linux/ve.h b/include/linux/ve.h index 86b95c3..1249102 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -128,6 +128,10 @@ struct ve_struct { unsigned long aio_nr; unsigned long aio_max_nr; #endif + /* Number of mounts. May become unbalanced if VE0 mounts something + * and the VE unmounts it. This is acceptable. + */ + int mnt_nr; }; struct ve_devmnt { @@ -145,6 +149,8 @@ extern int nr_ve; extern struct proc_dir_entry *proc_vz_dir; extern struct cgroup_subsys ve_subsys; +extern unsigned int sysctl_ve_mount_nr; + #ifdef CONFIG_VE_IPTABLES extern __u64 ve_setup_iptables_mask(__u64 init_mask); #endif @@ -222,6 +228,23 @@ extern struct tty_driver *vtty_console_driver(int *index); extern int vtty_open_master(envid_t veid, int idx); #endif /* CONFIG_TTY */ +static inline int ve_mount_allowed(void) +{ + struct ve_struct *ve = get_exec_env(); + + return ve_is_super(ve) || ve->mnt_nr < sysctl_ve_mount_nr; +} + +static inline void ve_mount_nr_inc(void) +{ + get_exec_env()->mnt_nr++; +} + +static inline void ve_mount_nr_dec(void) +{ + get_exec_env()->mnt_nr--; +} + #else /* CONFIG_VE */ #define ve_uevent_seqnum uevent_seqnum @@ -253,6 +276,10 @@ static inline void monotonic_abs_to_ve(clockid_t which_clock, struct timespec *tp) { } static inline void monotonic_ve_to_abs(clockid_t which_clock, struct timepsec *tp) { } + +static inline int ve_mount_allowed(void) { return 1; } +static inline void ve_mount_nr_inc(void) { } +static inline void ve_mount_nr_dec(void) { } #endif /* CONFIG_VE */ #endif /* _LINUX_VE_H */ diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index e9219e6..ac2babb 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -82,6 +82,7 @@ struct ve_struct ve0 = { #endif .sched_lat_ve.cur = &ve0_lat_stats, .init_cred = &init_cred, + .mnt_nr = 0, }; EXPORT_SYMBOL(ve0); @@ -653,6 +654,7 @@ do_init: ve->aio_nr = 0; ve->aio_max_nr = AIO_MAX_NR_DEFAULT; #endif + ve->mnt_nr = 0; return &ve->css; diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c index 316e4d0..1a7e735 100644 --- a/kernel/ve/veowner.c +++ b/kernel/ve/veowner.c @@ -55,6 +55,14 @@ static void prepare_proc(void) int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT; static int ve_area_access_check; +/* + * Operations with a big amount of mount points can require a lot of time. + * These operations take the global lock namespace_sem, so they can affect + * other containers. Let us allow no more than sysctl_ve_mount_nr mount + * points for a VE. + */ +unsigned int sysctl_ve_mount_nr = 4096; + static struct ctl_table vz_fs_table[] = { { .procname = "ve-area-access-check", @@ -77,6 +85,13 @@ static struct ctl_table vz_fs_table[] = { .mode = 0644 | S_ISVTX, .proc_handler = &proc_dointvec_virtual, }, + { + .procname = "ve-mount-nr", + .data = &sysctl_ve_mount_nr, + .maxlen = sizeof(sysctl_ve_mount_nr), + .mode = 0644, + .proc_handler = proc_dointvec, + }, { 0 } }; -- 2.3.2 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel