This one is redundant and should be dropped.  I ran git-format-patch
twice after a quick rebase to tweak the subject and header.

Sorry for the confusion.

On Tue, Jan 16 2018 at 11:33pm -0500,
Mike Snitzer <snit...@redhat.com> wrote:

> From: Ming Lei <ming....@redhat.com>
> 
> blk_insert_cloned_request() is called in the fast path of a dm-rq driver
> (e.g. blk-mq request-based DM mpath).  blk_insert_cloned_request() uses
> blk_mq_request_bypass_insert() to directly append the request to the
> blk-mq hctx->dispatch_list of the underlying queue.
> 
> 1) This way isn't efficient enough because the hctx spinlock is always
> used.
> 
> 2) With blk_insert_cloned_request(), we completely bypass underlying
> queue's elevator and depend on the upper-level dm-rq driver's elevator
> to schedule IO.  But dm-rq currently can't get the underlying queue's
> dispatch feedback at all.  Without knowing whether a request was issued
> or not (e.g. due to underlying queue being busy) the dm-rq elevator will
> not be able to provide effective IO merging (as a side-effect of dm-rq
> currently blindly destaging a request from its elevator only to requeue
> it after a delay, which kills any opportunity for merging).  This
> obviously causes very bad sequential IO performance.
> 
> Fix this by updating blk_insert_cloned_request() to use
> blk_mq_request_direct_issue().  blk_mq_request_direct_issue() allows a
> request to be issued directly to the underlying queue and returns the
> dispatch feedback (blk_status_t).  If blk_mq_request_direct_issue()
> returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE
> to _not_ destage the request.  Whereby preserving the opportunity to
> merge IO.
> 
> With this, request-based DM's blk-mq sequential IO performance is vastly
> improved (as much as 3X in mpath/virtio-scsi testing).
> 
> Signed-off-by: Ming Lei <ming....@redhat.com>
> [based _heavily_ on Ming Lei's initial solution, but blk-mq.c changes
> were refactored to make them less fragile and easier to read/review]
> Signed-off-by: Mike Snitzer <snit...@redhat.com>
> ---
>  block/blk-core.c   |  3 +--
>  block/blk-mq.c     | 42 +++++++++++++++++++++++++++++++++---------
>  block/blk-mq.h     |  3 +++
>  drivers/md/dm-rq.c | 19 ++++++++++++++++---
>  4 files changed, 53 insertions(+), 14 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 7ba607527487..55f338020254 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -2500,8 +2500,7 @@ blk_status_t blk_insert_cloned_request(struct 
> request_queue *q, struct request *
>                * bypass a potential scheduler on the bottom device for
>                * insert.
>                */
> -             blk_mq_request_bypass_insert(rq, true);
> -             return BLK_STS_OK;
> +             return blk_mq_request_direct_issue(rq);
>       }
>  
>       spin_lock_irqsave(q->queue_lock, flags);
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index f30e34a22a6c..81ee3f9124dc 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1706,7 +1706,8 @@ static blk_status_t __blk_mq_issue_directly(struct 
> blk_mq_hw_ctx *hctx,
>       blk_qc_t new_cookie;
>       blk_status_t ret;
>  
> -     new_cookie = request_to_qc_t(hctx, rq);
> +     if (cookie)
> +             new_cookie = request_to_qc_t(hctx, rq);
>  
>       /*
>        * For OK queue, we are done. For error, caller may kill it.
> @@ -1716,13 +1717,15 @@ static blk_status_t __blk_mq_issue_directly(struct 
> blk_mq_hw_ctx *hctx,
>       ret = q->mq_ops->queue_rq(hctx, &bd);
>       switch (ret) {
>       case BLK_STS_OK:
> -             *cookie = new_cookie;
> +             if (cookie)
> +                     *cookie = new_cookie;
>               break;
>       case BLK_STS_RESOURCE:
>               __blk_mq_requeue_request(rq);
>               break;
>       default:
> -             *cookie = BLK_QC_T_NONE;
> +             if (cookie)
> +                     *cookie = BLK_QC_T_NONE;
>               break;
>       }
>  
> @@ -1731,15 +1734,20 @@ static blk_status_t __blk_mq_issue_directly(struct 
> blk_mq_hw_ctx *hctx,
>  
>  static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
>                                       struct request *rq,
> -                                     bool run_queue)
> +                                     bool run_queue, bool bypass_insert)
>  {
> +     if (bypass_insert) {
> +             blk_mq_request_bypass_insert(rq, run_queue);
> +             return;
> +     }
>       blk_mq_sched_insert_request(rq, false, run_queue, false,
>                                       hctx->flags & BLK_MQ_F_BLOCKING);
>  }
>  
>  static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
>                                               struct request *rq,
> -                                             blk_qc_t *cookie)
> +                                             blk_qc_t *cookie,
> +                                             bool bypass_insert)
>  {
>       struct request_queue *q = rq->q;
>       bool run_queue = true;
> @@ -1750,7 +1758,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct 
> blk_mq_hw_ctx *hctx,
>               goto insert;
>       }
>  
> -     if (q->elevator)
> +     if (q->elevator && !bypass_insert)
>               goto insert;
>  
>       if (!blk_mq_get_driver_tag(rq, NULL, false))
> @@ -1763,7 +1771,9 @@ static blk_status_t __blk_mq_try_issue_directly(struct 
> blk_mq_hw_ctx *hctx,
>  
>       return __blk_mq_issue_directly(hctx, rq, cookie);
>  insert:
> -     __blk_mq_fallback_to_insert(hctx. rq, run_queue);
> +     __blk_mq_fallback_to_insert(hctx. rq, run_queue, bypass_insert);
> +     if (bypass_insert)
> +             return BLK_STS_RESOURCE;
>  
>       return BLK_STS_OK;
>  }
> @@ -1778,15 +1788,29 @@ static void blk_mq_try_issue_directly(struct 
> blk_mq_hw_ctx *hctx,
>  
>       hctx_lock(hctx, &srcu_idx);
>  
> -     ret = __blk_mq_try_issue_directly(hctx, rq, cookie);
> +     ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
>       if (ret == BLK_STS_RESOURCE)
> -             __blk_mq_fallback_to_insert(hctx. rq, true);
> +             __blk_mq_fallback_to_insert(hctx. rq, true, false);
>       else if (ret != BLK_STS_OK)
>               blk_mq_end_request(rq, ret);
>  
>       hctx_unlock(hctx, srcu_idx);
>  }
>  
> +blk_status_t blk_mq_request_direct_issue(struct request *rq)
> +{
> +     blk_status_t ret;
> +     int srcu_idx;
> +     struct blk_mq_ctx *ctx = rq->mq_ctx;
> +     struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
> +
> +     hctx_lock(hctx, &srcu_idx);
> +     ret = __blk_mq_try_issue_directly(hctx, rq, NULL, true);
> +     hctx_unlock(hctx, srcu_idx);
> +
> +     return ret;
> +}
> +
>  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
>  {
>       const int is_sync = op_is_sync(bio->bi_opf);
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 8591a54d989b..e3ebc93646ca 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -74,6 +74,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool 
> run_queue);
>  void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx 
> *ctx,
>                               struct list_head *list);
>  
> +/* Used by blk_insert_cloned_request() to issue request directly */
> +blk_status_t blk_mq_request_direct_issue(struct request *rq);
> +
>  /*
>   * CPU -> queue mappings
>   */
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index c28357f5cb0e..b7d175e94a02 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -395,7 +395,7 @@ static void end_clone_request(struct request *clone, 
> blk_status_t error)
>       dm_complete_request(tio->orig, error);
>  }
>  
> -static void dm_dispatch_clone_request(struct request *clone, struct request 
> *rq)
> +static blk_status_t dm_dispatch_clone_request(struct request *clone, struct 
> request *rq)
>  {
>       blk_status_t r;
>  
> @@ -404,9 +404,10 @@ static void dm_dispatch_clone_request(struct request 
> *clone, struct request *rq)
>  
>       clone->start_time = jiffies;
>       r = blk_insert_cloned_request(clone->q, clone);
> -     if (r)
> +     if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)
>               /* must complete clone in terms of original request */
>               dm_complete_request(rq, r);
> +     return r;
>  }
>  
>  static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
> @@ -476,8 +477,10 @@ static int map_request(struct dm_rq_target_io *tio)
>       struct mapped_device *md = tio->md;
>       struct request *rq = tio->orig;
>       struct request *clone = NULL;
> +     blk_status_t ret;
>  
>       r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
> +check_again:
>       switch (r) {
>       case DM_MAPIO_SUBMITTED:
>               /* The target has taken the I/O to submit by itself later */
> @@ -492,7 +495,17 @@ static int map_request(struct dm_rq_target_io *tio)
>               /* The target has remapped the I/O so dispatch it */
>               trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
>                                    blk_rq_pos(rq));
> -             dm_dispatch_clone_request(clone, rq);
> +             ret = dm_dispatch_clone_request(clone, rq);
> +             if (ret == BLK_STS_RESOURCE) {
> +                     blk_rq_unprep_clone(clone);
> +                     tio->ti->type->release_clone_rq(clone);
> +                     tio->clone = NULL;
> +                     if (!rq->q->mq_ops)
> +                             r = DM_MAPIO_DELAY_REQUEUE;
> +                     else
> +                             r = DM_MAPIO_REQUEUE;
> +                     goto check_again;
> +             }
>               break;
>       case DM_MAPIO_REQUEUE:
>               /* The target wants to requeue the I/O */
> -- 
> 2.15.0
> 
> --
> dm-devel mailing list
> dm-de...@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to