Jens Axboe <ax...@kernel.dk> writes: > On 10/22/2015 09:53 AM, Jeff Moyer wrote: >> I think that percolating BLK_MQ_F_TAG_SHARED up to the tag set would >> allow newly created hctxs to simply inherit the shared state (in >> blk_mq_init_hctx), and you won't need to freeze every queue in order to >> guarantee that. >> >> I was writing a patch to that effect. I've now stopped as I want to >> make sure I'm not off in the weeds. :) > > If that is where the delay is done, then yes, that should fix it and > be a trivial patch.
It's not quite as trivial as I had hoped. Jason, can you give the attached patch a try? All I've done is boot tested it so far. Thanks! Jeff diff --git a/block/blk-mq.c b/block/blk-mq.c index 7785ae9..8b4c484 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1860,27 +1860,26 @@ static void blk_mq_map_swqueue(struct request_queue *q, } } -static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) +static void queue_set_hctx_shared(struct request_queue *q, bool shared) { struct blk_mq_hw_ctx *hctx; - struct request_queue *q; - bool shared; int i; - if (set->tag_list.next == set->tag_list.prev) - shared = false; - else - shared = true; + queue_for_each_hw_ctx(q, hctx, i) { + if (shared) + hctx->flags |= BLK_MQ_F_TAG_SHARED; + else + hctx->flags &= ~BLK_MQ_F_TAG_SHARED; + } +} + +static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) +{ + struct request_queue *q; list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_freeze_queue(q); - - queue_for_each_hw_ctx(q, hctx, i) { - if (shared) - hctx->flags |= BLK_MQ_F_TAG_SHARED; - else - hctx->flags &= ~BLK_MQ_F_TAG_SHARED; - } + queue_set_hctx_shared(q, shared); blk_mq_unfreeze_queue(q); } } @@ -1891,7 +1890,13 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) mutex_lock(&set->tag_list_lock); list_del_init(&q->tag_set_list); - blk_mq_update_tag_set_depth(set); + + if (set->tag_list.next == set->tag_list.prev) { + /* just transitioned to unshared */ + set->flags &= ~BLK_MQ_F_TAG_SHARED; + /* update existing queue */ + blk_mq_update_tag_set_depth(set, false); + } mutex_unlock(&set->tag_list_lock); } @@ -1902,7 +1907,24 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, mutex_lock(&set->tag_list_lock); list_add_tail(&q->tag_set_list, &set->tag_list); - blk_mq_update_tag_set_depth(set); + + if (set->tag_list.next != set->tag_list.prev) { + /* + * Only update the tag set state if the state has + * actually changed. + */ + if (!(set->flags & BLK_MQ_F_TAG_SHARED)) { + /* just transitioned to shared tags */ + set->flags |= BLK_MQ_F_TAG_SHARED; + blk_mq_update_tag_set_depth(set, true); + } else { + /* ensure we didn't race with another addition */ + struct blk_mq_hw_ctx *hctx = queue_first_hw_ctx(q); + if ((hctx->flags & BLK_MQ_F_TAG_SHARED) != + BLK_MQ_F_TAG_SHARED) + queue_set_hctx_shared(q, true); + } + } mutex_unlock(&set->tag_list_lock); } diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5e7d43a..12ffc40 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -254,6 +254,9 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) +#define queue_first_hw_ctx(q) \ + (q)->queue_hw_ctx[0] + #define blk_ctx_sum(q, sum) \ ({ \ struct blk_mq_ctx *__x; \ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/