For background to this email, read the "teardown races" section in utrace.txt and Roland's "asynchronous detach" email.
The crash-suspend.c example suffers from the "teardown races" problem. (Since systemtap's utrace code is a more elaborate version of crash-suspend.c, systemtap has the same problem.) So, I've attempted to come up with a solution that doesn't pervert the example too much. I've attached a patch with the details. Basically, at module unload time, instead of detaching directly, it tries to asynchronously stop all the threads we're attached to and then let the quiesce handler detach from the thread. The semi-tricky part was letting the module unload function, exit_crash_suspend(), know when all threads that we had attached to were detached. To do this, the code keeps up with an attach count. When the attach count reaches 0, the module unload function gets woken up to go ahead and exit. I'd appreciate any thoughts, criticisms, etc. on this patch, which I've tested under kernel 2.6.27-0.186.rc0.git15.fc10. -- David Smith [EMAIL PROTECTED] Red Hat http://www.redhat.com 256.217.0141 (direct) 256.837.0057 (fax)
--- /home/dsmith/crash-suspend.c 2008-07-30 15:36:46.000000000 -0500 +++ crash-suspend2.c 2008-07-30 16:01:58.000000000 -0500 @@ -18,6 +18,14 @@ module_param(verbose, bool, 0); UTRACE_EVENT(SIGNAL_CORE) | UTRACE_EVENT(JCTL) | \ UTRACE_EVENT(QUIESCE)) +#define SHUTDOWN_EVENTS (UTRACE_EVENT(QUIESCE) | UTRACE_EVENT(DEATH)) +#define CS_STARTING 0 +#define CS_STOPPING 1 +atomic_t state = ATOMIC_INIT(CS_STARTING); +atomic_t attach_count = ATOMIC_INIT (0); + +static DECLARE_WAIT_QUEUE_HEAD(crash_suspend_wq); + /* * This is the interesting hook. */ @@ -61,6 +69,13 @@ crash_suspend_quiesce(u32 action, struct */ if (!event) engine->data = NULL; + + if (atomic_read(&state) == CS_STOPPING) { + if (atomic_dec_return(&attach_count) <= 0) { + wake_up(&crash_suspend_wq); + } + return UTRACE_DETACH; + } return UTRACE_RESUME; } @@ -85,7 +100,7 @@ crash_suspend_jctl(enum utrace_resume_ac * proper weirdo status, stop the rest of the * process group too in normal job control fashion. */ - (void) kill_pgrp(find_pid(-pgid), SIGTTOU, 1); + (void) kill_pgrp(find_vpid(-pgid), SIGTTOU, 1); } else if (engine->data) { /* * We've been resumed after a crash. @@ -117,6 +132,7 @@ crash_suspend_clone(enum utrace_resume_a } else { int err = utrace_set_events(child, child_engine, MY_EVENTS); WARN_ON(err); + atomic_inc(&attach_count); } return UTRACE_RESUME; @@ -131,13 +147,21 @@ crash_suspend_death(struct utrace_attach struct task_struct *tsk, bool group_dead, int signal) { + if (atomic_read(&state) == CS_STOPPING) { + if (atomic_dec_return(&attach_count) <= 0) { + wake_up(&crash_suspend_wq); + } + } + else { + atomic_dec(&attach_count); + } return UTRACE_DETACH; } - static const struct utrace_engine_ops crash_suspend_ops = { .report_clone = crash_suspend_clone, + .report_quiesce = crash_suspend_quiesce, .report_death = crash_suspend_death, .report_signal = crash_suspend_signal, .report_jctl = crash_suspend_jctl, @@ -151,7 +175,7 @@ static int __init init_crash_suspend(voi int ret; rcu_read_lock(); - target = find_task_by_pid(target_pid); + target = find_task_by_vpid(target_pid); if (target) get_task_struct(target); rcu_read_unlock(); @@ -173,8 +197,10 @@ static int __init init_crash_suspend(voi ret = utrace_set_events(target, engine, MY_EVENTS); if (ret == -ESRCH) printk("pid %d died during setup\n", target->pid); - else + else { + atomic_inc(&attach_count); WARN_ON(ret); + } WARN_ON(atomic_dec_and_test(&target->usage)); return 0; @@ -186,6 +212,7 @@ static void __exit exit_crash_suspend(vo struct utrace_attached_engine *engine; int n = 0; + atomic_set(&state, CS_STOPPING); rcu_read_lock(); for_each_process(t) { engine = utrace_attach(t, UTRACE_ATTACH_MATCH_OPS, @@ -197,14 +224,33 @@ static void __exit exit_crash_suspend(vo error, t->pid); } else { - int ret = utrace_control(t, engine, UTRACE_DETACH); + int ret = utrace_set_events(t, engine, + SHUTDOWN_EVENTS); WARN_ON(ret); + + ret = utrace_control(t, engine, UTRACE_STOP); + if (ret == -EINPROGRESS) { + ret = utrace_control(t, engine, + UTRACE_INTERRUPT); + WARN_ON(ret); + } + else { + WARN_ON(ret); + } ++n; } } rcu_read_unlock(); - printk("detached from %d threads, unloading\n", n); + printk(KERN_INFO "requested %d threads to stop\n", n); + + if (wait_event_timeout(crash_suspend_wq, + (atomic_read(&attach_count) <= 0), + 60 * HZ) <= 0) { + printk(KERN_ERR "gave up waiting.\n"); + } + printk(KERN_INFO "unloading, %d threads left to stop\n", + atomic_read(&attach_count)); } module_init(init_crash_suspend);