Re: [RFC PATCH 4/4] nvme: translate virtual addresses into the bus's address space

2021-09-23 Thread Stefan Agner
On 2021-09-24 01:20, Stefan Agner wrote:
> So far we've been content with passing physical/CPU addresses when
> configuring memory addresses into NVMe controllers, but not all
> platforms have buses with transparent mappings. Specifically the
> Raspberry Pi 4 might introduce an offset to memory accesses incoming
> from its PCIe port.
> 
> Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these
> limitations, and make sure we don't break non DM users.
> For devices where PCIe's view of host memory doesn't match the memory
> as seen by the CPU.
> 
> A similar change has been introduced for XHCI controller with
> commit 1a474559d90a ("xhci: translate virtual addresses into the bus's
> address space").
> 
> Signed-off-by: Stefan Agner 
> ---
> 
>  drivers/nvme/nvme.c | 32 ++--
>  drivers/nvme/nvme.h | 15 +++
>  2 files changed, 33 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
> index 4c4dc7cc4d..0b7082d71b 100644
> --- a/drivers/nvme/nvme.c
> +++ b/drivers/nvme/nvme.c
> @@ -95,7 +95,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
>   buffer += (page_size - offset);
>  
>   if (length <= page_size) {
> - *prp2 = (u64)buffer;
> + *prp2 = nvme_virt_to_bus(dev, buffer);
>   return 0;
>   }
>  
> @@ -120,16 +120,16 @@ static int nvme_setup_prps(struct nvme_dev *dev,
> u64 *prp2,
>   i = 0;
>   while (nprps) {
>   if (i == prps_per_page) {
> - u64 next_prp_list = (u64)prp_pool + page_size;
> - *(prp_pool + i) = cpu_to_le64(next_prp_list);
> + u64 next = nvme_virt_to_bus(dev, prp_pool + page_size);
> + *(prp_pool + i) = cpu_to_le64(next);
>   i = 0;
>   prp_pool += page_size;
>   }
> - *(prp_pool + i++) = cpu_to_le64((u64)buffer);
> + *(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>   buffer += page_size;
>   nprps--;
>   }
> - *prp2 = (u64)dev->prp_pool;
> + *prp2 = nvme_virt_to_bus(dev, dev->prp_pool);
>  
>   flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
>  dev->prp_entry_num * sizeof(u64));
> @@ -356,6 +356,7 @@ static int nvme_configure_admin_queue(struct nvme_dev 
> *dev)
>   int result;
>   u32 aqa;
>   u64 cap = dev->cap;
> + u64 dma_addr;
>   struct nvme_queue *nvmeq;
>   /* most architectures use 4KB as the page size */
>   unsigned page_shift = 12;
> @@ -396,8 +397,10 @@ static int nvme_configure_admin_queue(struct nvme_dev 
> *dev)
>   dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
>  
>   writel(aqa, >bar->aqa);
> - nvme_writeq((ulong)nvmeq->sq_cmds, >bar->asq);
> - nvme_writeq((ulong)nvmeq->cqes, >bar->acq);
> + dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds);
> + nvme_writeq(dma_addr, >bar->asq);
> + dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes);
> + nvme_writeq(dma_addr, >bar->acq);
>  
>   result = nvme_enable_ctrl(dev);
>   if (result)
> @@ -423,7 +426,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
>  
>   memset(, 0, sizeof(c));
>   c.create_cq.opcode = nvme_admin_create_cq;
> - c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
> + c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes));
>   c.create_cq.cqid = cpu_to_le16(qid);
>   c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>   c.create_cq.cq_flags = cpu_to_le16(flags);
> @@ -440,7 +443,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
>  
>   memset(, 0, sizeof(c));
>   c.create_sq.opcode = nvme_admin_create_sq;
> - c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
> + c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds));
>   c.create_sq.sqid = cpu_to_le16(qid);
>   c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
>   c.create_sq.sq_flags = cpu_to_le16(flags);
> @@ -461,14 +464,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid,
>   memset(, 0, sizeof(c));
>   c.identify.opcode = nvme_admin_identify;
>   c.identify.nsid = cpu_to_le32(nsid);
> - c.identify.prp1 = cpu_to_le64((u64)buffer);
> + c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>  
>   length -= (page_size - offset);
>   if (length <= 0) {
>   c.identify.prp2 = 0;
>   } else {
>   buffer += (page_size - offset);
> - c.identify.prp2 = cpu_to_le64((u64)buffer);
> + c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
>   }
>  
>   c.identify.cns = cpu_to_le32(cns);
> @@ -493,7 +496,7 @@ int nvme_get_features(struct nvme_dev *dev,
> unsigned fid, unsigned nsid,
>   memset(, 0, sizeof(c));
>   c.features.opcode = 

[RFC PATCH 4/4] nvme: translate virtual addresses into the bus's address space

2021-09-23 Thread Stefan Agner
So far we've been content with passing physical/CPU addresses when
configuring memory addresses into NVMe controllers, but not all
platforms have buses with transparent mappings. Specifically the
Raspberry Pi 4 might introduce an offset to memory accesses incoming
from its PCIe port.

Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these
limitations, and make sure we don't break non DM users.
For devices where PCIe's view of host memory doesn't match the memory
as seen by the CPU.

A similar change has been introduced for XHCI controller with
commit 1a474559d90a ("xhci: translate virtual addresses into the bus's
address space").

Signed-off-by: Stefan Agner 
---

 drivers/nvme/nvme.c | 32 ++--
 drivers/nvme/nvme.h | 15 +++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c
index 4c4dc7cc4d..0b7082d71b 100644
--- a/drivers/nvme/nvme.c
+++ b/drivers/nvme/nvme.c
@@ -95,7 +95,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2,
buffer += (page_size - offset);
 
if (length <= page_size) {
-   *prp2 = (u64)buffer;
+   *prp2 = nvme_virt_to_bus(dev, buffer);
return 0;
}
 
@@ -120,16 +120,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 
*prp2,
i = 0;
while (nprps) {
if (i == prps_per_page) {
-   u64 next_prp_list = (u64)prp_pool + page_size;
-   *(prp_pool + i) = cpu_to_le64(next_prp_list);
+   u64 next = nvme_virt_to_bus(dev, prp_pool + page_size);
+   *(prp_pool + i) = cpu_to_le64(next);
i = 0;
prp_pool += page_size;
}
-   *(prp_pool + i++) = cpu_to_le64((u64)buffer);
+   *(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
buffer += page_size;
nprps--;
}
-   *prp2 = (u64)dev->prp_pool;
+   *prp2 = nvme_virt_to_bus(dev, dev->prp_pool);
 
flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool +
   dev->prp_entry_num * sizeof(u64));
@@ -356,6 +356,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
int result;
u32 aqa;
u64 cap = dev->cap;
+   u64 dma_addr;
struct nvme_queue *nvmeq;
/* most architectures use 4KB as the page size */
unsigned page_shift = 12;
@@ -396,8 +397,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
writel(aqa, >bar->aqa);
-   nvme_writeq((ulong)nvmeq->sq_cmds, >bar->asq);
-   nvme_writeq((ulong)nvmeq->cqes, >bar->acq);
+   dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds);
+   nvme_writeq(dma_addr, >bar->asq);
+   dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes);
+   nvme_writeq(dma_addr, >bar->acq);
 
result = nvme_enable_ctrl(dev);
if (result)
@@ -423,7 +426,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid,
 
memset(, 0, sizeof(c));
c.create_cq.opcode = nvme_admin_create_cq;
-   c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes);
+   c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes));
c.create_cq.cqid = cpu_to_le16(qid);
c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
c.create_cq.cq_flags = cpu_to_le16(flags);
@@ -440,7 +443,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid,
 
memset(, 0, sizeof(c));
c.create_sq.opcode = nvme_admin_create_sq;
-   c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds);
+   c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds));
c.create_sq.sqid = cpu_to_le16(qid);
c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
c.create_sq.sq_flags = cpu_to_le16(flags);
@@ -461,14 +464,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid,
memset(, 0, sizeof(c));
c.identify.opcode = nvme_admin_identify;
c.identify.nsid = cpu_to_le32(nsid);
-   c.identify.prp1 = cpu_to_le64((u64)buffer);
+   c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
 
length -= (page_size - offset);
if (length <= 0) {
c.identify.prp2 = 0;
} else {
buffer += (page_size - offset);
-   c.identify.prp2 = cpu_to_le64((u64)buffer);
+   c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer));
}
 
c.identify.cns = cpu_to_le32(cns);
@@ -493,7 +496,7 @@ int nvme_get_features(struct nvme_dev *dev, unsigned fid, 
unsigned nsid,
memset(, 0, sizeof(c));
c.features.opcode = nvme_admin_get_features;
c.features.nsid = cpu_to_le32(nsid);
-   c.features.prp1 = cpu_to_le64((u64)buffer);
+