On Thu, 4 Jan 2007 17:29:36 +0300
Oleg Nesterov <[EMAIL PROTECTED]> wrote:

> > In brief:
> > 
> > keventd thread                                      hotplug thread
> > --------------                                      --------------
> > 
> >   run_workqueue()
> >     |
> >      work_fn()
> >      |
> >     flush_workqueue()
> >          |  
> >        flush_cpu_workqueue
> >             |                               cpu_down()
> >          mutex_unlock(wq_mutex);                 |
> >     (above opens window for hotplug)           mutex_lock(wq_mutex);
> >                     |                                  /* bring down cpu */ 
> >          wait_for_completition();                notifier(CPU_DEAD, ..)
> >             |                                      workqueue_cpu_callback
> >             |                                       cleanup_workqueue_thread
> >             |                                         kthread_stop()
> >             |
> >             |
> >          mutex_lock(wq_mutex); <- Can deadlock
> > 
> > 
> > The kthread_stop() will wait for keventd() thread to exit, but keventd()
> > is blocked on mutex_lock(wq_mutex) leading to a deadlock.

This?


--- 
a/kernel/workqueue.c~flush_workqueue-use-preempt_disable-to-hold-off-cpu-hotplug
+++ a/kernel/workqueue.c
@@ -419,18 +419,22 @@ static void flush_cpu_workqueue(struct c
                 * Probably keventd trying to flush its own queue. So simply run
                 * it by hand rather than deadlocking.
                 */
-               mutex_unlock(&workqueue_mutex);
+               preempt_enable();
+               /*
+                * We can still touch *cwq here because we are keventd, and
+                * hot-unplug will be waiting us to exit.
+                */
                run_workqueue(cwq);
-               mutex_lock(&workqueue_mutex);
+               preempt_disable();
        } else {
                struct wq_barrier barr;
 
                init_wq_barrier(&barr);
                __queue_work(cwq, &barr.work);
 
-               mutex_unlock(&workqueue_mutex);
+               preempt_enable();       /* Can no longer touch *cwq */
                wait_for_completion(&barr.done);
-               mutex_lock(&workqueue_mutex);
+               preempt_disable();
        }
 }
 
@@ -449,7 +453,7 @@ static void flush_cpu_workqueue(struct c
  */
 void fastcall flush_workqueue(struct workqueue_struct *wq)
 {
-       mutex_lock(&workqueue_mutex);
+       preempt_disable();              /* CPU hotplug */
        if (is_single_threaded(wq)) {
                /* Always use first cpu's area. */
                flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu));
@@ -459,7 +463,7 @@ void fastcall flush_workqueue(struct wor
                for_each_online_cpu(cpu)
                        flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
        }
-       mutex_unlock(&workqueue_mutex);
+       preempt_enable();
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
_

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to