The tag waiting loop in bt_get() function is a mystery for me:

        do {
                bool was_empty;

1.              was_empty = list_empty(&wait.task_list);
                prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);

                tag = __bt_get(hctx, bt, last_tag);
                if (tag != -1)
                        break;

2.              if (was_empty)
3.                      atomic_set(&bs->wait_cnt, bt->wake_cnt);

                io_schedule();
        } while (1);

[1] list_empty(&wait.task_list) check is not protected;
[2] was_empty check is always true which results in *every* thread
    entering the loop resets bt_wait_state::wait_cnt counter rather
    than every bt->wake_cnt'th thread;
[3] 'bt_wait_state::wait_cnt' counter update seems redundant anyway,
    since it is also gets reset in bt_clear_tag() function;

Cc: Ming Lei <tom.leim...@gmail.com>
Cc: Jens Axboe <ax...@kernel.dk>
Signed-off-by: Alexander Gordeev <agord...@redhat.com>
---
 block/blk-mq-tag.c |   27 +++++++--------------------
 1 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 5579fae..dc1f684 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -224,7 +224,6 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct 
blk_mq_hw_ctx *hctx,
                  unsigned int *last_tag, gfp_t gfp)
 {
        struct bt_wait_state *bs;
-       DEFINE_WAIT(wait);
        int tag;
 
        tag = __bt_get(hctx, bt, last_tag);
@@ -235,23 +234,9 @@ static int bt_get(struct blk_mq_bitmap_tags *bt, struct 
blk_mq_hw_ctx *hctx,
                return -1;
 
        bs = bt_wait_ptr(bt, hctx);
-       do {
-               bool was_empty;
-
-               was_empty = list_empty(&wait.task_list);
-               prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
-
-               tag = __bt_get(hctx, bt, last_tag);
-               if (tag != -1)
-                       break;
+       ___wait_event(bs->wait, (tag = __bt_get(hctx, bt, last_tag)) != -1,
+                     TASK_UNINTERRUPTIBLE, 0, 0, io_schedule());
 
-               if (was_empty)
-                       atomic_set(&bs->wait_cnt, bt->wake_cnt);
-
-               io_schedule();
-       } while (1);
-
-       finish_wait(&bs->wait, &wait);
        return tag;
 }
 
@@ -477,13 +462,15 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, 
unsigned int depth,
                return -ENOMEM;
        }
 
-       for (i = 0; i < BT_WAIT_QUEUES; i++)
-               init_waitqueue_head(&bt->bs[i].wait);
-
        bt->wake_cnt = BT_WAIT_BATCH;
        if (bt->wake_cnt > depth / 4)
                bt->wake_cnt = max(1U, depth / 4);
 
+       for (i = 0; i < BT_WAIT_QUEUES; i++) {
+               init_waitqueue_head(&bt->bs[i].wait);
+               atomic_set(&bt->bs[i].wait_cnt, bt->wake_cnt);
+       }
+
        bt->depth = depth;
        return 0;
 }
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to