Even if we have no waiters on any of the sbitmap_queue wait states, we still have to loop every entry to check. We do this for every IO, so the cost adds up.
Shift a bit of the cost to the slow path, when we actually have waiters. Wrap prepare_to_wait_exclusive() and finish_wait(), so we can maintain an internal count of how many are currently active. Then we can simply check this count in sbq_wake_ptr() and not have to loop if we don't have any sleepers. Convert the two users of sbitmap with waiting, blk-mq-tag and iSCSI. Signed-off-by: Jens Axboe <ax...@kernel.dk> --- block/blk-mq-tag.c | 7 +++---- drivers/target/iscsi/iscsi_target_util.c | 8 +++++--- include/linux/sbitmap.h | 19 +++++++++++++++++++ lib/sbitmap.c | 21 +++++++++++++++++++++ 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 87bc5df72d48..66c3a1c887ed 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -154,8 +154,7 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) if (tag != -1) break; - prepare_to_wait_exclusive(&ws->wait, &wait, - TASK_UNINTERRUPTIBLE); + sbitmap_prepare_to_wait(bt, ws, &wait, TASK_UNINTERRUPTIBLE); tag = __blk_mq_get_tag(data, bt); if (tag != -1) @@ -167,6 +166,8 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) bt_prev = bt; io_schedule(); + sbitmap_finish_wait(bt, ws, &wait); + data->ctx = blk_mq_get_ctx(data->q); data->hctx = blk_mq_map_queue(data->q, data->cmd_flags, data->ctx->cpu); @@ -176,8 +177,6 @@ unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data) else bt = &tags->bitmap_tags; - finish_wait(&ws->wait, &wait); - /* * If destination hw queue is changed, fake wake up on * previous queue for compensating the wake up miss, so diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index 36b742932c72..d7d03d601732 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -152,13 +152,15 @@ static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup) int tag = -1; DEFINE_WAIT(wait); struct sbq_wait_state *ws; + struct sbitmap_queue *sbq; if (state == TASK_RUNNING) return tag; - ws = &se_sess->sess_tag_pool.ws[0]; + sbq = &se_sess->sess_tag_pool; + ws = &sbq->ws[0]; for (;;) { - prepare_to_wait_exclusive(&ws->wait, &wait, state); + sbitmap_prepare_to_wait(sbq, ws, &wait, state); if (signal_pending_state(state, current)) break; tag = sbitmap_queue_get(&se_sess->sess_tag_pool, cpup); @@ -167,7 +169,7 @@ static int iscsit_wait_for_tag(struct se_session *se_sess, int state, int *cpup) schedule(); } - finish_wait(&ws->wait, &wait); + sbitmap_finish_wait(sbq, ws, &wait); return tag; } diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 13eb8973bd10..dbfbac0c4daa 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -135,6 +135,11 @@ struct sbitmap_queue { */ struct sbq_wait_state *ws; + /* + * @ws_active: count of currently active ws waitqueues + */ + atomic_t ws_active; + /** * @round_robin: Allocate bits in strict round-robin order. */ @@ -554,4 +559,18 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); +/* + * Wrapper around prepare_to_wait_exclusive(), which maintains some extra + * internal state. + */ +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct wait_queue_entry *wait, int state); + +/* + * Must be paired with sbitmap_prepare_to_wait(). + */ +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, + struct wait_queue_entry *wait); + #endif /* __LINUX_SCALE_BITMAP_H */ diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 04db31f4dfda..1cc21f916276 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -384,6 +384,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, sbq->min_shallow_depth = UINT_MAX; sbq->wake_batch = sbq_calc_wake_batch(sbq, depth); atomic_set(&sbq->wake_index, 0); + atomic_set(&sbq->ws_active, 0); sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node); if (!sbq->ws) { @@ -499,6 +500,9 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) { int i, wake_index; + if (!atomic_read(&sbq->ws_active)) + return NULL; + wake_index = atomic_read(&sbq->wake_index); for (i = 0; i < SBQ_WAIT_QUEUES; i++) { struct sbq_wait_state *ws = &sbq->ws[wake_index]; @@ -639,3 +643,20 @@ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) seq_printf(m, "min_shallow_depth=%u\n", sbq->min_shallow_depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_show); + +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct wait_queue_entry *wait, int state) +{ + atomic_inc(&sbq->ws_active); + prepare_to_wait_exclusive(&ws->wait, wait, state); +} +EXPORT_SYMBOL_GPL(sbitmap_prepare_to_wait); + +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, + struct wait_queue_entry *wait) +{ + finish_wait(&ws->wait, wait); + atomic_dec(&sbq->ws_active); +} +EXPORT_SYMBOL_GPL(sbitmap_finish_wait); -- 2.17.1