Adding blk-mq support to the pmem driver in addition to the direct bio
support. This allows for hardware offloading via DMA engines. By default
the bio method will be enabled. The blk-mq support can be turned on via
module parameter queue_mode=1.

Signed-off-by: Dave Jiang <dave.ji...@intel.com>
---
 drivers/nvdimm/pmem.c |  137 +++++++++++++++++++++++++++++++++++++++++--------
 drivers/nvdimm/pmem.h |    3 +
 2 files changed, 119 insertions(+), 21 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d46..98e752f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -31,10 +31,24 @@
 #include <linux/pmem.h>
 #include <linux/dax.h>
 #include <linux/nd.h>
+#include <linux/blk-mq.h>
 #include "pmem.h"
 #include "pfn.h"
 #include "nd.h"
 
+enum {
+       PMEM_Q_BIO = 0,
+       PMEM_Q_MQ = 1,
+};
+
+static int queue_mode = PMEM_Q_BIO;
+module_param(queue_mode, int, 0444);
+MODULE_PARM_DESC(queue_mode, "Pmem Queue Mode (0=BIO, 1=BLK-MQ)");
+
+struct pmem_cmd {
+       struct request *rq;
+};
+
 static struct device *to_dev(struct pmem_device *pmem)
 {
        /*
@@ -239,9 +253,13 @@ static const struct dax_operations pmem_dax_ops = {
        .direct_access = pmem_dax_direct_access,
 };
 
-static void pmem_release_queue(void *q)
+static void pmem_release_queue(void *data)
 {
-       blk_cleanup_queue(q);
+       struct pmem_device *pmem = (struct pmem_device *)data;
+
+       blk_cleanup_queue(pmem->q);
+       if (queue_mode == PMEM_Q_MQ)
+               blk_mq_free_tag_set(&pmem->tag_set);
 }
 
 static void pmem_freeze_queue(void *q)
@@ -259,6 +277,54 @@ static void pmem_release_disk(void *__pmem)
        put_disk(pmem->disk);
 }
 
+static int pmem_handle_cmd(struct pmem_cmd *cmd)
+{
+       struct request *req = cmd->rq;
+       struct request_queue *q = req->q;
+       struct pmem_device *pmem = q->queuedata;
+       struct nd_region *nd_region = to_region(pmem);
+       struct bio_vec bvec;
+       struct req_iterator iter;
+       int rc = 0;
+
+       if (req->cmd_flags & REQ_FLUSH)
+               nvdimm_flush(nd_region);
+
+       rq_for_each_segment(bvec, req, iter) {
+               rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
+                               bvec.bv_offset, op_is_write(req_op(req)),
+                               iter.iter.bi_sector);
+               if (rc < 0)
+                       break;
+       }
+
+       if (req->cmd_flags & REQ_FUA)
+               nvdimm_flush(nd_region);
+
+       blk_mq_end_request(cmd->rq, rc);
+
+       return rc;
+}
+
+static int pmem_queue_rq(struct blk_mq_hw_ctx *hctx,
+               const struct blk_mq_queue_data *bd)
+{
+       struct pmem_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+
+       cmd->rq = bd->rq;
+
+       blk_mq_start_request(bd->rq);
+
+       if (pmem_handle_cmd(cmd) < 0)
+               return BLK_MQ_RQ_QUEUE_ERROR;
+       else
+               return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static const struct blk_mq_ops pmem_mq_ops = {
+       .queue_rq       = pmem_queue_rq,
+};
+
 static int pmem_attach_disk(struct device *dev,
                struct nd_namespace_common *ndns)
 {
@@ -272,9 +338,9 @@ static int pmem_attach_disk(struct device *dev,
        struct nd_pfn_sb *pfn_sb;
        struct pmem_device *pmem;
        struct resource pfn_res;
-       struct request_queue *q;
        struct gendisk *disk;
        void *addr;
+       int rc;
 
        /* while nsio_rw_bytes is active, parse a pfn info block if present */
        if (is_nd_pfn(dev)) {
@@ -303,17 +369,47 @@ static int pmem_attach_disk(struct device *dev,
                return -EBUSY;
        }
 
-       q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
-       if (!q)
-               return -ENOMEM;
+       if (queue_mode == PMEM_Q_MQ) {
+               pmem->tag_set.ops = &pmem_mq_ops;
+               pmem->tag_set.nr_hw_queues = nr_online_nodes;
+               pmem->tag_set.queue_depth = 64;
+               pmem->tag_set.numa_node = dev_to_node(dev);
+               pmem->tag_set.cmd_size = sizeof(struct pmem_cmd);
+               pmem->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
+               pmem->tag_set.driver_data = pmem;
+
+               rc = blk_mq_alloc_tag_set(&pmem->tag_set);
+               if (rc < 0)
+                       return rc;
+
+               pmem->q = blk_mq_init_queue(&pmem->tag_set);
+               if (IS_ERR(pmem->q)) {
+                       blk_mq_free_tag_set(&pmem->tag_set);
+                       return -ENOMEM;
+               }
 
-       if (devm_add_action_or_reset(dev, pmem_release_queue, q))
-               return -ENOMEM;
+               if (devm_add_action_or_reset(dev, pmem_release_queue, pmem)) {
+                       blk_mq_free_tag_set(&pmem->tag_set);
+                       return -ENOMEM;
+               }
+       } else if (queue_mode == PMEM_Q_BIO) {
+               pmem->q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
+               if (!pmem->q)
+                       return -ENOMEM;
+
+               if (devm_add_action_or_reset(dev, pmem_release_queue, pmem))
+                       return -ENOMEM;
+
+               blk_queue_make_request(pmem->q, pmem_make_request);
+       } else {
+               dev_warn(dev, "Invalid queue mode: %d\n", queue_mode);
+               return -EINVAL;
+       }
 
        pmem->pfn_flags = PFN_DEV;
        if (is_nd_pfn(dev)) {
-               addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
-                               altmap);
+               addr = devm_memremap_pages(dev, &pfn_res,
+                               &pmem->q->q_usage_counter, altmap);
                pfn_sb = nd_pfn->pfn_sb;
                pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
                pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
@@ -322,7 +418,7 @@ static int pmem_attach_disk(struct device *dev,
                res->start += pmem->data_offset;
        } else if (pmem_should_map_pages(dev)) {
                addr = devm_memremap_pages(dev, &nsio->res,
-                               &q->q_usage_counter, NULL);
+                               &pmem->q->q_usage_counter, NULL);
                pmem->pfn_flags |= PFN_MAP;
        } else
                addr = devm_memremap(dev, pmem->phys_addr,
@@ -332,21 +428,20 @@ static int pmem_attach_disk(struct device *dev,
         * At release time the queue must be frozen before
         * devm_memremap_pages is unwound
         */
-       if (devm_add_action_or_reset(dev, pmem_freeze_queue, q))
+       if (devm_add_action_or_reset(dev, pmem_freeze_queue, pmem->q))
                return -ENOMEM;
 
        if (IS_ERR(addr))
                return PTR_ERR(addr);
        pmem->virt_addr = addr;
 
-       blk_queue_write_cache(q, true, true);
-       blk_queue_make_request(q, pmem_make_request);
-       blk_queue_physical_block_size(q, PAGE_SIZE);
-       blk_queue_max_hw_sectors(q, UINT_MAX);
-       blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
-       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q);
-       queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
-       q->queuedata = pmem;
+       blk_queue_write_cache(pmem->q, true, true);
+       blk_queue_physical_block_size(pmem->q, PAGE_SIZE);
+       blk_queue_max_hw_sectors(pmem->q, UINT_MAX);
+       blk_queue_bounce_limit(pmem->q, BLK_BOUNCE_ANY);
+       queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->q);
+       queue_flag_set_unlocked(QUEUE_FLAG_DAX, pmem->q);
+       pmem->q->queuedata = pmem;
 
        disk = alloc_disk_node(0, nid);
        if (!disk)
@@ -354,7 +449,7 @@ static int pmem_attach_disk(struct device *dev,
        pmem->disk = disk;
 
        disk->fops              = &pmem_fops;
-       disk->queue             = q;
+       disk->queue             = pmem->q;
        disk->flags             = GENHD_FL_EXT_DEVT;
        nvdimm_namespace_disk_name(ndns, disk->disk_name);
        set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 7f4dbd7..1836c7b 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/pfn_t.h>
 #include <linux/fs.h>
+#include <linux/blk-mq.h>
 
 /* this definition is in it's own header for tools/testing/nvdimm to consume */
 struct pmem_device {
@@ -20,6 +21,8 @@ struct pmem_device {
        struct badblocks        bb;
        struct dax_device       *dax_dev;
        struct gendisk          *disk;
+       struct blk_mq_tag_set   tag_set;
+       struct request_queue    *q;
 };
 
 long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,

_______________________________________________
Linux-nvdimm mailing list
Linux-nvdimm@lists.01.org
https://lists.01.org/mailman/listinfo/linux-nvdimm

Reply via email to