[PATCH v3 11/11] nvmet: Optionally use PCI P2P memory

2018-03-12 Thread Logan Gunthorpe
We create a configfs attribute in each nvme-fabrics target port to
enable p2p memory use. When enabled, the port will only then use the
p2p memory if a p2p memory device can be found which is behind the
same switch as the RDMA port and all the block devices in use. If
the user enabled it an no devices are found, then the system will
silently fall back on using regular memory.

If appropriate, that port will allocate memory for the RDMA buffers
for queues from the p2pmem device falling back to system memory should
anything fail.

Ideally, we'd want to use an NVME CMB buffer as p2p memory. This would
save an extra PCI transfer as the NVME card could just take the data
out of it's own memory. However, at this time, cards with CMB buffers
don't seem to be available.

Signed-off-by: Stephen Bates 
Signed-off-by: Steve Wise 
[hch: partial rewrite of the initial code]
Signed-off-by: Christoph Hellwig 
Signed-off-by: Logan Gunthorpe 
---
 drivers/nvme/target/configfs.c |  67 ++
 drivers/nvme/target/core.c | 106 -
 drivers/nvme/target/io-cmd.c   |   3 ++
 drivers/nvme/target/nvmet.h|  12 +
 drivers/nvme/target/rdma.c |  32 +++--
 5 files changed, 214 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e6b2d2af81b6..6ca8c712f0d3 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -17,6 +17,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "nvmet.h"
 
@@ -867,12 +869,77 @@ static void nvmet_port_release(struct config_item *item)
kfree(port);
 }
 
+#ifdef CONFIG_PCI_P2PDMA
+static ssize_t nvmet_p2pmem_show(struct config_item *item, char *page)
+{
+   struct nvmet_port *port = to_nvmet_port(item);
+
+   if (!port->use_p2pmem)
+   return sprintf(page, "none\n");
+
+   if (!port->p2p_dev)
+   return sprintf(page, "auto\n");
+
+   return sprintf(page, "%s\n", pci_name(port->p2p_dev));
+}
+
+static ssize_t nvmet_p2pmem_store(struct config_item *item,
+ const char *page, size_t count)
+{
+   struct nvmet_port *port = to_nvmet_port(item);
+   struct device *dev;
+   struct pci_dev *p2p_dev = NULL;
+   bool use_p2pmem;
+
+   switch (page[0]) {
+   case 'y':
+   case 'Y':
+   case 'a':
+   case 'A':
+   use_p2pmem = true;
+   break;
+   case 'n':
+   case 'N':
+   use_p2pmem = false;
+   break;
+   default:
+   dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
+   if (!dev) {
+   pr_err("No such PCI device: %s\n", page);
+   return -ENODEV;
+   }
+
+   use_p2pmem = true;
+   p2p_dev = to_pci_dev(dev);
+
+   if (!pci_has_p2pmem(p2p_dev)) {
+   pr_err("PCI device has no peer-to-peer memory: %s\n",
+  page);
+   pci_dev_put(p2p_dev);
+   return -ENODEV;
+   }
+   }
+
+   down_write(&nvmet_config_sem);
+   port->use_p2pmem = use_p2pmem;
+   pci_dev_put(port->p2p_dev);
+   port->p2p_dev = p2p_dev;
+   up_write(&nvmet_config_sem);
+
+   return count;
+}
+CONFIGFS_ATTR(nvmet_, p2pmem);
+#endif /* CONFIG_PCI_P2PDMA */
+
 static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_adrfam,
&nvmet_attr_addr_treq,
&nvmet_attr_addr_traddr,
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
+#ifdef CONFIG_PCI_P2PDMA
+   &nvmet_attr_p2pmem,
+#endif
NULL,
 };
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index a78029e4e5f4..ab3cc7135ae8 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "nvmet.h"
 
@@ -271,6 +272,25 @@ void nvmet_put_namespace(struct nvmet_ns *ns)
percpu_ref_put(&ns->ref);
 }
 
+static int nvmet_p2pdma_add_client(struct nvmet_ctrl *ctrl,
+  struct nvmet_ns *ns)
+{
+   int ret;
+
+   if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
+   pr_err("peer-to-peer DMA is not supported by %s\n",
+  ns->device_path);
+   return -EINVAL;
+   }
+
+   ret = pci_p2pdma_add_client(&ctrl->p2p_clients, nvmet_ns_dev(ns));
+   if (ret)
+   pr_err("failed to add peer-to-peer DMA client %s: %d\n",
+  ns->device_path, ret);
+
+   return ret;
+}
+
 int nvmet_ns_enable(struct nvmet_ns *ns)
 {
struct nvmet_subsys *subsys = ns->subsys;
@@ -299,6 +319,14 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
if (ret)
goto out_blkdev_put;
 
+   list_for_each_entry(ctrl, &subsys->ctrls, subsys_

Re: [PATCH v3 11/11] nvmet: Optionally use PCI P2P memory

2018-03-21 Thread Christoph Hellwig
> +   const char *page, size_t count)
> +{
> + struct nvmet_port *port = to_nvmet_port(item);
> + struct device *dev;
> + struct pci_dev *p2p_dev = NULL;
> + bool use_p2pmem;
> +
> + switch (page[0]) {
> + case 'y':
> + case 'Y':
> + case 'a':
> + case 'A':
> + use_p2pmem = true;
> + break;
> + case 'n':
> + case 'N':
> + use_p2pmem = false;
> + break;
> + default:
> + dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
> + if (!dev) {
> + pr_err("No such PCI device: %s\n", page);
> + return -ENODEV;
> + }
> +
> + use_p2pmem = true;
> + p2p_dev = to_pci_dev(dev);
> +
> + if (!pci_has_p2pmem(p2p_dev)) {
> + pr_err("PCI device has no peer-to-peer memory: %s\n",
> +page);
> + pci_dev_put(p2p_dev);
> + return -ENODEV;
> + }
> + }

Yikes.  Shouldn't auto just be the normal yes case instead of this
string parsing mess?

> + if (rsp->req.sg != &rsp->cmd->inline_sg) {
> + if (rsp->req.p2p_dev)
> + pci_p2pmem_free_sgl(rsp->req.p2p_dev, rsp->req.sg,
> + rsp->req.sg_cnt);
> + else
> + sgl_free(rsp->req.sg);
> + }

Can we factor this into a helper, as the other target drivers (fc for now,
tcp soon) using sgl allocatins should share the code?

(same for the alloc side)



Re: [PATCH v3 11/11] nvmet: Optionally use PCI P2P memory

2018-03-21 Thread Logan Gunthorpe


On 21/03/18 03:27 AM, Christoph Hellwig wrote:
>> +  const char *page, size_t count)
>> +{
>> +struct nvmet_port *port = to_nvmet_port(item);
>> +struct device *dev;
>> +struct pci_dev *p2p_dev = NULL;
>> +bool use_p2pmem;
>> +
>> +switch (page[0]) {
>> +case 'y':
>> +case 'Y':
>> +case 'a':
>> +case 'A':
>> +use_p2pmem = true;
>> +break;
>> +case 'n':
>> +case 'N':
>> +use_p2pmem = false;
>> +break;
>> +default:
>> +dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
>> +if (!dev) {
>> +pr_err("No such PCI device: %s\n", page);
>> +return -ENODEV;
>> +}
>> +
>> +use_p2pmem = true;
>> +p2p_dev = to_pci_dev(dev);
>> +
>> +if (!pci_has_p2pmem(p2p_dev)) {
>> +pr_err("PCI device has no peer-to-peer memory: %s\n",
>> +   page);
>> +pci_dev_put(p2p_dev);
>> +return -ENODEV;
>> +}
>> +}
> 
> Yikes.  Shouldn't auto just be the normal yes case instead of this
> string parsing mess?

Sorry, I don't follow. The code, as is, should automatically select the
device if the user  sets it to "yes" or "auto" or "y" or similar.
(Roughly similar to how kstrtobool() works, except '0' or '1' are not
accepted seeing they could overlap with PCI device names). In other
cases, it looks for the specific PCI device name to use exactly.

Are you saying it shouldn't work this way or are you saying the code to
implement it is too messy?

>> +if (rsp->req.sg != &rsp->cmd->inline_sg) {
>> +if (rsp->req.p2p_dev)
>> +pci_p2pmem_free_sgl(rsp->req.p2p_dev, rsp->req.sg,
>> +rsp->req.sg_cnt);
>> +else
>> +sgl_free(rsp->req.sg);
>> +}
> 
> Can we factor this into a helper, as the other target drivers (fc for now,
> tcp soon) using sgl allocatins should share the code?
> 
> (same for the alloc side)

Sure. Would the helpers belong in core.c?

Thanks,

Logan