If the underlying queue returns BLK_STS_RESOURCE, we let dm-rq
handle the requeue instead of blk-mq, then I/O merge can be
improved because underlying's out-of-resource can be perceived
and handled by dm-rq now.

Follows IOPS test of mpath on lpfc, fio(libaio, bs:4k, dio,
queue_depth:64, 8 jobs).

1) blk-mq none scheduler
-----------------------------------------------------
 IOPS(K)  |v4.14-rc2    |v4.14-rc2 with| v4.14-rc2 with
          |             |[1][2]        | [1] [2] [3]
-----------------------------------------------------
read      |       53.69 |       40.26  |       94.61
-----------------------------------------------------
randread  |       24.64 |       30.08  |       35.57
-----------------------------------------------------
write     |       39.55 |       41.51  |      216.84
-----------------------------------------------------
randwrite |       33.97 |       34.27  |       33.98
-----------------------------------------------------

2) blk-mq mq-deadline scheduler
-----------------------------------------------------
 IOPS(K)  |v4.14-rc2    |v4.14-rc2 with| v4.14-rc2 with
          |             |[1][2]        | [1] [2] [3]
-----------------------------------------------------
 IOPS(K)  |MQ-DEADLINE  |MQ-DEADLINE  |MQ-DEADLINE
-----------------------------------------------------
read      |       23.81 |       21.91 |       89.94
-----------------------------------------------------
randread  |       38.47 |       38.96 |       38.02
-----------------------------------------------------
write     |       39.52 |        40.2 |      225.75
-----------------------------------------------------
randwrite |        34.8 |       33.73 |       33.44
-----------------------------------------------------

[1] [PATCH V5 0/7] blk-mq-sched: improve sequential I/O performance(part 1)

        https://marc.info/?l=linux-block&m=150676854821077&w=2

[2] [PATCH V5 0/8] blk-mq: improve bio merge for none scheduler

        https://marc.info/?l=linux-block&m=150677085521416&w=2

[3] this patchset

Signed-off-by: Ming Lei <ming....@redhat.com>
---
 block/blk-mq.c     | 17 +----------------
 drivers/md/dm-rq.c | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 9a3a561a63b5..58d2268f9733 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1467,17 +1467,6 @@ void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 
struct request *rq,
        blk_mq_hctx_mark_pending(hctx, ctx);
 }
 
-static void blk_mq_request_direct_insert(struct blk_mq_hw_ctx *hctx,
-                                        struct request *rq)
-{
-       spin_lock(&hctx->lock);
-       list_add_tail(&rq->queuelist, &hctx->dispatch);
-       set_bit(BLK_MQ_S_DISPATCH_BUSY, &hctx->state);
-       spin_unlock(&hctx->lock);
-
-       blk_mq_run_hw_queue(hctx, false);
-}
-
 /*
  * Should only be used carefully, when the caller knows we want to
  * bypass a potential IO scheduler on the target device.
@@ -1487,12 +1476,8 @@ blk_status_t blk_mq_request_bypass_insert(struct request 
*rq)
        struct blk_mq_ctx *ctx = rq->mq_ctx;
        struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
        blk_qc_t cookie;
-       blk_status_t ret;
 
-       ret = blk_mq_try_issue_directly(hctx, rq, &cookie, true);
-       if (ret == BLK_STS_RESOURCE)
-               blk_mq_request_direct_insert(hctx, rq);
-       return ret;
+       return blk_mq_try_issue_directly(hctx, rq, &cookie, true);
 }
 
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 2ef524bddd38..feb49c4d6fa2 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -405,7 +405,7 @@ static void end_clone_request(struct request *clone, 
blk_status_t error)
        dm_complete_request(tio->orig, error);
 }
 
-static void dm_dispatch_clone_request(struct request *clone, struct request 
*rq)
+static blk_status_t dm_dispatch_clone_request(struct request *clone, struct 
request *rq)
 {
        blk_status_t r;
 
@@ -417,6 +417,7 @@ static void dm_dispatch_clone_request(struct request 
*clone, struct request *rq)
        if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)
                /* must complete clone in terms of original request */
                dm_complete_request(rq, r);
+       return r;
 }
 
 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
@@ -490,8 +491,10 @@ static int map_request(struct dm_rq_target_io *tio)
        struct request *rq = tio->orig;
        struct request *cache = tio->clone;
        struct request *clone = cache;
+       blk_status_t ret;
 
        r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
+ again:
        switch (r) {
        case DM_MAPIO_SUBMITTED:
                /* The target has taken the I/O to submit by itself later */
@@ -509,7 +512,14 @@ static int map_request(struct dm_rq_target_io *tio)
                /* The target has remapped the I/O so dispatch it */
                trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
                                     blk_rq_pos(rq));
-               dm_dispatch_clone_request(clone, rq);
+               ret = dm_dispatch_clone_request(clone, rq);
+               if (ret == BLK_STS_RESOURCE) {
+                       if (!rq->q->mq_ops)
+                               r = DM_MAPIO_DELAY_REQUEUE;
+                       else
+                               r = DM_MAPIO_REQUEUE;
+                       goto again;
+               }
                break;
        case DM_MAPIO_REQUEUE:
                /* The target wants to requeue the I/O */
-- 
2.9.5

Reply via email to