There is a race between cpu hotplug handling and adding/deleting
gendisk for blk-mq, where both are trying to register and unregister
the same sysfs entries.

cpu hotplug handling for blk-mq (blk_mq_queue_reinit) does unregister
and register sysfs entries of hctx for all request queues in
all_q_list.  On the other hand, these entries for a request queue may
have already been unregistered when cpu hotplug event occurs.  Because
those sysfs entries are unregistered by blk_mq_unregister_disk()
firstly, and then a request queue is deleted from all_q_list by
blk_mq_free_queue().  So there is a race window that cpu hotplug must
not occur.

Fix it by serializing sysfs registration/unregistration with using
per hardware queue mutex and BLK_MQ_F_SYSFS_UP flag.

Signed-off-by: Akinobu Mita <akinobu.m...@gmail.com>
Cc: Jens Axboe <ax...@kernel.dk>
---
 block/blk-mq-sysfs.c   | 19 +++++++++++++++++--
 block/blk-mq.c         |  1 +
 include/linux/blk-mq.h |  1 +
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index b79685e..d8ef3a3 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -371,7 +371,10 @@ void blk_mq_unregister_disk(struct gendisk *disk)
        int i, j;
 
        queue_for_each_hw_ctx(q, hctx, i) {
+               mutex_lock(&hctx->sysfs_up_lock);
                blk_mq_unregister_hctx(hctx);
+               hctx->flags &= ~BLK_MQ_F_SYSFS_UP;
+               mutex_unlock(&hctx->sysfs_up_lock);
 
                hctx_for_each_ctx(hctx, ctx, j)
                        kobject_put(&ctx->kobj);
@@ -423,10 +426,17 @@ int blk_mq_register_disk(struct gendisk *disk)
        kobject_uevent(&q->mq_kobj, KOBJ_ADD);
 
        queue_for_each_hw_ctx(q, hctx, i) {
+               mutex_lock(&hctx->sysfs_up_lock);
+
                hctx->flags |= BLK_MQ_F_SYSFS_UP;
                ret = blk_mq_register_hctx(hctx);
-               if (ret)
+
+               if (ret) {
+                       hctx->flags &= ~BLK_MQ_F_SYSFS_UP;
+                       mutex_unlock(&hctx->sysfs_up_lock);
                        break;
+               }
+               mutex_unlock(&hctx->sysfs_up_lock);
        }
 
        if (ret) {
@@ -443,8 +453,11 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
        struct blk_mq_hw_ctx *hctx;
        int i;
 
-       queue_for_each_hw_ctx(q, hctx, i)
+       queue_for_each_hw_ctx(q, hctx, i) {
+               mutex_lock(&hctx->sysfs_up_lock);
                blk_mq_unregister_hctx(hctx);
+               mutex_unlock(&hctx->sysfs_up_lock);
+       }
 }
 
 int blk_mq_sysfs_register(struct request_queue *q)
@@ -453,7 +466,9 @@ int blk_mq_sysfs_register(struct request_queue *q)
        int i, ret = 0;
 
        queue_for_each_hw_ctx(q, hctx, i) {
+               mutex_lock(&hctx->sysfs_up_lock);
                ret = blk_mq_register_hctx(hctx);
+               mutex_unlock(&hctx->sysfs_up_lock);
                if (ret)
                        break;
        }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 594eea0..ec94258 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1660,6 +1660,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
        INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
        spin_lock_init(&hctx->lock);
        INIT_LIST_HEAD(&hctx->dispatch);
+       mutex_init(&hctx->sysfs_up_lock);
        hctx->queue = q;
        hctx->queue_num = hctx_idx;
        hctx->flags = set->flags;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 2056a99..78cd4f3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -59,6 +59,7 @@ struct blk_mq_hw_ctx {
 
        struct blk_mq_cpu_notifier      cpu_notifier;
        struct kobject          kobj;
+       struct mutex            sysfs_up_lock;
 };
 
 struct blk_mq_tag_set {
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to