This patch reintroduces the feature from commit 6d43ed1, which was reverted
due to a scenario where a hanging user-mode-helper in one container could
block the startup of other containers.
Race can be reproduced with steps below:
1) Add these test patch to increase the race probability:
kernel/umh.c
@@ -455,6 +456,8 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
sub_info->queue = call_usermodehelper_queue_ve;
kthread_init_work(&sub_info->ve_work,
call_usermodehelper_exec_work_ve);
+ while (VE_IS_RUNNING(ve))
+ cond_resched();
} else {
sub_info->queue = call_usermodehelper_queue;
INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
2) Set corepattern to type pipe in CT:
echo "|/bin/dd of=/vz/coredumps/core-%e-%t.%p" > /proc/sys/kernel/core_pattern
3) Generate high CPU load on all cores using tools like stress-ng
stress-ng --futex $(nproc) --timeout 60s
4) Produce a segfault inside a container and next try to stop the
container killing init.
Coredump creates "dd" work and ads it to ve_umh_worker, which is already
stopped and will never handle these work, and our work will hang
forever, and container will never stop:
[<0>] call_usermodehelper_exec+0x168/0x1a0
[<0>] call_usermodehelper_exec_ve+0x96/0xe0
[<0>] do_coredump+0x60f/0xf40
[<0>] get_signal+0x834/0x960
[<0>] arch_do_signal_or_restart+0x29/0xf0
[<0>] irqentry_exit_to_user_mode+0x12e/0x1a0
[<0>] asm_exc_page_fault+0x26/0x30
Fix is:
1) Before calling call_usermodehelper_exec for a ve, check that
the ve is running before adding work.
2) Add separate hepler counters for each ve.
3) In ve_stop_ns, after setting the VE_STATE_STOPPING state,
wait for the running helpers count to reach 0 before stopping umh.
There are 2 cases:
If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
after ve_stop_ns started wait_khelpers, it will already see that
the ve is no in the running state and will no queue the work.
If the call_usermodehelper_exec_ve thread reaches call_usermodehelper_exec
before the VE_STATE_STOPPING state is set for the ve, then ve_stop_ns will
wait in wait_khelpers until all works have been queued. After that all
queued works will be processed in kthread_flush_worker.
https://virtuozzo.atlassian.net/browse/VSTOR-106887
Signed-off-by: Aleksei Oladko <[email protected]>
---
include/linux/umh.h | 2 ++
include/linux/ve.h | 2 ++
kernel/umh.c | 27 ++++++++++++++++++++++++++-
kernel/ve/ve.c | 5 +++++
4 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/include/linux/umh.h b/include/linux/umh.h
index 5647f1e64e39..35fc4023df74 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -56,6 +56,8 @@ extern int
call_usermodehelper_exec_ve(struct ve_struct *ve,
struct subprocess_info *info, int wait);
+extern void wait_khelpers(struct ve_struct *ve);
+
#else /* CONFIG_VE */
#define call_usermodehelper_ve(ve, ...) \
diff --git a/include/linux/ve.h b/include/linux/ve.h
index e944132f972f..37562dff25aa 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -93,6 +93,8 @@ struct ve_struct {
struct kthread_worker *kthreadd_worker;
struct task_struct *kthreadd_task;
+ atomic_t umh_running_helpers;
+ struct wait_queue_head umh_helpers_waitq;
struct kthread_worker umh_worker;
struct task_struct *umh_task;
diff --git a/kernel/umh.c b/kernel/umh.c
index bd49c108eb90..699403efe382 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -447,9 +447,30 @@ static void call_usermodehelper_exec_work_ve(struct
kthread_work *work)
}
}
+static void umh_running_helpers_inc(struct ve_struct *ve)
+{
+ atomic_inc(&ve->umh_running_helpers);
+ smp_mb__after_atomic();
+}
+
+static void umh_running_helpers_dec(struct ve_struct *ve)
+{
+ if (atomic_dec_and_test(&ve->umh_running_helpers))
+ wake_up(&ve->umh_helpers_waitq);
+}
+
+void wait_khelpers(struct ve_struct *ve)
+{
+ wait_event(ve->umh_helpers_waitq,
+ atomic_read(&ve->umh_running_helpers) == 0);
+}
+EXPORT_SYMBOL(wait_khelpers);
+
int call_usermodehelper_exec_ve(struct ve_struct *ve,
struct subprocess_info *sub_info, int wait)
{
+ int ret = -EBUSY;
+
if (!ve_is_super(ve)) {
sub_info->ve = ve;
sub_info->queue = call_usermodehelper_queue_ve;
@@ -460,7 +481,11 @@ int call_usermodehelper_exec_ve(struct ve_struct *ve,
INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
}
- return call_usermodehelper_exec(sub_info, wait);
+ umh_running_helpers_inc(ve);
+ if (VE_IS_RUNNING(ve))
+ ret = call_usermodehelper_exec(sub_info, wait);
+ umh_running_helpers_dec(ve);
+ return ret;
}
EXPORT_SYMBOL(call_usermodehelper_exec_ve);
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index cb77f7b7e4cd..d5dc15942ab5 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -88,6 +88,8 @@ struct ve_struct ve0 = {
.nd_neigh_nr = ATOMIC_INIT(0),
.mnt_nr = ATOMIC_INIT(0),
.meminfo_val = VE_MEMINFO_SYSTEM,
+ .umh_running_helpers = ATOMIC_INIT(0),
+ .umh_helpers_waitq =
__WAIT_QUEUE_HEAD_INITIALIZER(ve0.umh_helpers_waitq),
.vdso_64 = (struct vdso_image*)&vdso_image_64,
.vdso_32 = (struct vdso_image*)&vdso_image_32,
.release_list_lock = __SPIN_LOCK_UNLOCKED(
@@ -480,6 +482,8 @@ static int ve_start_umh(struct ve_struct *ve)
{
struct task_struct *task;
+ atomic_set(&ve->umh_running_helpers, 0);
+ init_waitqueue_head(&ve->umh_helpers_waitq);
kthread_init_worker(&ve->umh_worker);
task = kthread_create_on_node_ve_flags(ve, 0, kthread_worker_fn,
@@ -814,6 +818,7 @@ void ve_stop_ns(struct pid_namespace *pid_ns)
*/
up_write(&ve->op_sem);
+ wait_khelpers(ve);
/*
* release_agent works on top of umh_worker, so we must make sure, that
* ve workqueue is stopped first.