We have this plan for manage_workers(): if failed to grab
manager_mutex via mutex_trylock(), we will release gcwq->lock and then
grab manager_mutex again.

This plan will open a hole: hotplug is running after we release gcwq->lock,
and it will not handle the binding of manager. so we add ->manager
on worker_pool and let hotplug code(gcwq_unbind_fn()) handle it.

also fix too_many_workers() to use this pointer.

Signed-off-by: Lai Jiangshan <la...@cn.fujitsu.com>
---
 kernel/workqueue.c |   12 ++++++++++--
 1 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3dd7ce2..b203806 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -165,6 +165,7 @@ struct worker_pool {
        struct timer_list       idle_timer;     /* L: worker idle timeout */
        struct timer_list       mayday_timer;   /* L: SOS timer for workers */
 
+       struct worker           *manager;       /* L: manager worker */
        struct mutex            manager_mutex;  /* mutex manager should hold */
        struct ida              worker_ida;     /* L: for worker IDs */
 };
@@ -680,7 +681,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
-       bool managing = mutex_is_locked(&pool->manager_mutex);
+       bool managing = !!pool->manager;
        int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
        int nr_busy = pool->nr_workers - nr_idle;
 
@@ -2066,6 +2067,7 @@ static bool manage_workers(struct worker *worker)
        if (!mutex_trylock(&pool->manager_mutex))
                return ret;
 
+       pool->manager = worker;
        pool->flags &= ~POOL_MANAGE_WORKERS;
 
        /*
@@ -2076,6 +2078,8 @@ static bool manage_workers(struct worker *worker)
        ret |= maybe_create_worker(pool);
 
        mutex_unlock(&pool->manager_mutex);
+       pool->manager = NULL;
+
        return ret;
 }
 
@@ -3438,9 +3442,12 @@ static void gcwq_unbind_fn(struct work_struct *work)
         * ones which are still executing works from before the last CPU
         * down must be on the cpu.  After this, they may become diasporas.
         */
-       for_each_worker_pool(pool, gcwq)
+       for_each_worker_pool(pool, gcwq) {
                list_for_each_entry(worker, &pool->idle_list, entry)
                        worker->flags |= WORKER_UNBOUND;
+               if (pool->manager)
+                       pool->manager->flags |= WORKER_UNBOUND;
+       }
 
        for_each_busy_worker(worker, i, pos, gcwq)
                worker->flags |= WORKER_UNBOUND;
@@ -3760,6 +3767,7 @@ static int __init init_workqueues(void)
                        setup_timer(&pool->mayday_timer, gcwq_mayday_timeout,
                                    (unsigned long)pool);
 
+                       pool->manager = NULL;
                        mutex_init(&pool->manager_mutex);
                        ida_init(&pool->worker_ida);
                }
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to