This patch supports to run one single flush machinery for each blk-mq dispatch queue, so that:
- current init_request and exit_request callbacks can cover flush request too, then the buggy copying way of initializing flush request's pdu can be fixed - flushing performance gets improved in case of multi hw-queue In fio sync write test over virtio-blk(4 hw queues, ioengine=sync, iodepth=64, numjobs=4, bs=4K), it is observed that througput gets increased a lot over my test environment: - throughput: +70% in case of virtio-blk over null_blk - throughput: +30% in case of virtio-blk over SSD image The multi virtqueue feature isn't merged to QEMU yet, and patches for the feature can be found in below tree: git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.3 And simply passing 'num_queues=4 vectors=5' should be enough to enable multi queue(quad queue) feature for QEMU virtio-blk. Suggested-by: Christoph Hellwig <h...@lst.de> Signed-off-by: Ming Lei <ming....@canonical.com> --- block/blk-flush.c | 18 +++++++++--------- block/blk-mq.c | 24 ++++++++++++++++++++++++ block/blk.h | 15 ++++++++++++++- include/linux/blk-mq.h | 2 ++ 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index f8cc690..3da32ca 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -482,23 +482,23 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, } EXPORT_SYMBOL(blkdev_issue_flush); -static struct blk_flush_queue *blk_alloc_flush_queue( - struct request_queue *q) +struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, int cmd_size) { struct blk_flush_queue *fq; int rq_sz = sizeof(struct request); + int node = hctx ? hctx->numa_node : NUMA_NO_NODE; - fq = kzalloc(sizeof(*fq), GFP_KERNEL); + fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node); if (!fq) goto fail; - if (q->mq_ops) { + if (hctx) { spin_lock_init(&fq->mq_flush_lock); - rq_sz = round_up(rq_sz + q->tag_set->cmd_size, - cache_line_size()); + rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); } - fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL); + fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node); if (!fq->flush_rq) goto fail_rq; @@ -514,7 +514,7 @@ static struct blk_flush_queue *blk_alloc_flush_queue( return ERR_PTR(-ENOMEM); } -static void blk_free_flush_queue(struct blk_flush_queue *fq) +void blk_free_flush_queue(struct blk_flush_queue *fq) { kfree(fq->flush_rq); kfree(fq); @@ -522,7 +522,7 @@ static void blk_free_flush_queue(struct blk_flush_queue *fq) int blk_init_flush(struct request_queue *q) { - q->fq = blk_alloc_flush_queue(q); + q->fq = blk_alloc_flush_queue(q, NULL, 0); if (IS_ERR(q->fq)) return PTR_ERR(q->fq); diff --git a/block/blk-mq.c b/block/blk-mq.c index afb0dfe..5a0da6d 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1531,12 +1531,20 @@ static void blk_mq_exit_hctx(struct request_queue *q, struct blk_mq_tag_set *set, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) { + unsigned flush_start_tag = set->queue_depth; + blk_mq_tag_idle(hctx); + if (set->ops->exit_request) + set->ops->exit_request(set->driver_data, + hctx->fq->flush_rq, hctx_idx, + flush_start_tag + hctx_idx); + if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); + blk_free_flush_queue(hctx->fq); kfree(hctx->ctxs); blk_mq_free_bitmap(&hctx->ctx_map); } @@ -1571,6 +1579,7 @@ static int blk_mq_init_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) { int node; + unsigned flush_start_tag = set->queue_depth; node = hctx->numa_node; if (node == NUMA_NO_NODE) @@ -1609,8 +1618,23 @@ static int blk_mq_init_hctx(struct request_queue *q, set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) goto free_bitmap; + hctx->fq = blk_alloc_flush_queue(q, hctx, set->cmd_size); + if (IS_ERR(hctx->fq)) + goto exit_hctx; + + if (set->ops->init_request && + set->ops->init_request(set->driver_data, + hctx->fq->flush_rq, hctx_idx, + flush_start_tag + hctx_idx, node)) + goto free_fq; + return 0; + free_fq: + kfree(hctx->fq); + exit_hctx: + if (set->ops->exit_hctx) + set->ops->exit_hctx(hctx, hctx_idx); free_bitmap: blk_mq_free_bitmap(&hctx->ctx_map); free_ctxs: diff --git a/block/blk.h b/block/blk.h index 30f8033..9f39b0d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -2,6 +2,8 @@ #define BLK_INTERNAL_H #include <linux/idr.h> +#include <linux/blk-mq.h> +#include "blk-mq.h" /* Amount of time in which a process may batch requests */ #define BLK_BATCH_TIME (HZ/50UL) @@ -31,7 +33,15 @@ extern struct ida blk_queue_ida; static inline struct blk_flush_queue *blk_get_flush_queue( struct request_queue *q, struct blk_mq_ctx *ctx) { - return q->fq; + struct blk_mq_hw_ctx *hctx; + + if (!q->mq_ops) + return q->fq; + + WARN_ON(!ctx); + hctx = q->mq_ops->map_queue(q, ctx->cpu); + + return hctx->fq; } static inline void __blk_get_queue(struct request_queue *q) @@ -41,6 +51,9 @@ static inline void __blk_get_queue(struct request_queue *q) int blk_init_flush(struct request_queue *q); void blk_exit_flush(struct request_queue *q); +struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q, + struct blk_mq_hw_ctx *hctx, int cmd_size); +void blk_free_flush_queue(struct blk_flush_queue *q); int blk_init_rl(struct request_list *rl, struct request_queue *q, gfp_t gfp_mask); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a1e31f2..1f3c523 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -4,6 +4,7 @@ #include <linux/blkdev.h> struct blk_mq_tags; +struct blk_flush_queue; struct blk_mq_cpu_notifier { struct list_head list; @@ -34,6 +35,7 @@ struct blk_mq_hw_ctx { struct request_queue *queue; unsigned int queue_num; + struct blk_flush_queue *fq; void *driver_data; -- 1.7.9.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/