On 2021-09-24 01:20, Stefan Agner wrote: > So far we've been content with passing physical/CPU addresses when > configuring memory addresses into NVMe controllers, but not all > platforms have buses with transparent mappings. Specifically the > Raspberry Pi 4 might introduce an offset to memory accesses incoming > from its PCIe port. > > Introduce nvme_virt_to_bus() and nvme_bus_to_virt() to cater with these > limitations, and make sure we don't break non DM users. > For devices where PCIe's view of host memory doesn't match the memory > as seen by the CPU. > > A similar change has been introduced for XHCI controller with > commit 1a474559d90a ("xhci: translate virtual addresses into the bus's > address space"). > > Signed-off-by: Stefan Agner <ste...@agner.ch> > --- > > drivers/nvme/nvme.c | 32 ++++++++++++++++++-------------- > drivers/nvme/nvme.h | 15 +++++++++++++++ > 2 files changed, 33 insertions(+), 14 deletions(-) > > diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c > index 4c4dc7cc4d..0b7082d71b 100644 > --- a/drivers/nvme/nvme.c > +++ b/drivers/nvme/nvme.c > @@ -95,7 +95,7 @@ static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, > buffer += (page_size - offset); > > if (length <= page_size) { > - *prp2 = (u64)buffer; > + *prp2 = nvme_virt_to_bus(dev, buffer); > return 0; > } > > @@ -120,16 +120,16 @@ static int nvme_setup_prps(struct nvme_dev *dev, > u64 *prp2, > i = 0; > while (nprps) { > if (i == prps_per_page) { > - u64 next_prp_list = (u64)prp_pool + page_size; > - *(prp_pool + i) = cpu_to_le64(next_prp_list); > + u64 next = nvme_virt_to_bus(dev, prp_pool + page_size); > + *(prp_pool + i) = cpu_to_le64(next); > i = 0; > prp_pool += page_size; > } > - *(prp_pool + i++) = cpu_to_le64((u64)buffer); > + *(prp_pool + i++) = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > buffer += page_size; > nprps--; > } > - *prp2 = (u64)dev->prp_pool; > + *prp2 = nvme_virt_to_bus(dev, dev->prp_pool); > > flush_dcache_range((ulong)dev->prp_pool, (ulong)dev->prp_pool + > dev->prp_entry_num * sizeof(u64)); > @@ -356,6 +356,7 @@ static int nvme_configure_admin_queue(struct nvme_dev > *dev) > int result; > u32 aqa; > u64 cap = dev->cap; > + u64 dma_addr; > struct nvme_queue *nvmeq; > /* most architectures use 4KB as the page size */ > unsigned page_shift = 12; > @@ -396,8 +397,10 @@ static int nvme_configure_admin_queue(struct nvme_dev > *dev) > dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; > > writel(aqa, &dev->bar->aqa); > - nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); > - nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); > + dma_addr = nvme_virt_to_bus(dev, nvmeq->sq_cmds); > + nvme_writeq(dma_addr, &dev->bar->asq); > + dma_addr = nvme_virt_to_bus(dev, nvmeq->cqes); > + nvme_writeq(dma_addr, &dev->bar->acq); > > result = nvme_enable_ctrl(dev); > if (result) > @@ -423,7 +426,7 @@ static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, > > memset(&c, 0, sizeof(c)); > c.create_cq.opcode = nvme_admin_create_cq; > - c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); > + c.create_cq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->cqes)); > c.create_cq.cqid = cpu_to_le16(qid); > c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); > c.create_cq.cq_flags = cpu_to_le16(flags); > @@ -440,7 +443,7 @@ static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, > > memset(&c, 0, sizeof(c)); > c.create_sq.opcode = nvme_admin_create_sq; > - c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); > + c.create_sq.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, nvmeq->sq_cmds)); > c.create_sq.sqid = cpu_to_le16(qid); > c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); > c.create_sq.sq_flags = cpu_to_le16(flags); > @@ -461,14 +464,14 @@ int nvme_identify(struct nvme_dev *dev, unsigned nsid, > memset(&c, 0, sizeof(c)); > c.identify.opcode = nvme_admin_identify; > c.identify.nsid = cpu_to_le32(nsid); > - c.identify.prp1 = cpu_to_le64((u64)buffer); > + c.identify.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > > length -= (page_size - offset); > if (length <= 0) { > c.identify.prp2 = 0; > } else { > buffer += (page_size - offset); > - c.identify.prp2 = cpu_to_le64((u64)buffer); > + c.identify.prp2 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > } > > c.identify.cns = cpu_to_le32(cns); > @@ -493,7 +496,7 @@ int nvme_get_features(struct nvme_dev *dev, > unsigned fid, unsigned nsid, > memset(&c, 0, sizeof(c)); > c.features.opcode = nvme_admin_get_features; > c.features.nsid = cpu_to_le32(nsid); > - c.features.prp1 = cpu_to_le64((u64)buffer); > + c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > c.features.fid = cpu_to_le32(fid); > > ret = nvme_submit_admin_cmd(dev, &c, result); > @@ -519,7 +522,7 @@ int nvme_set_features(struct nvme_dev *dev, > unsigned fid, unsigned dword11, > > memset(&c, 0, sizeof(c)); > c.features.opcode = nvme_admin_set_features; > - c.features.prp1 = cpu_to_le64((u64)buffer); > + c.features.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > c.features.fid = cpu_to_le32(fid); > c.features.dword11 = cpu_to_le32(dword11); > > @@ -775,7 +778,7 @@ static ulong nvme_blk_rw(struct udevice *udev, > lbaint_t blknr, > c.rw.slba = cpu_to_le64(slba); > slba += lbas; > c.rw.length = cpu_to_le16(lbas - 1); > - c.rw.prp1 = cpu_to_le64((ulong)buffer); > + c.rw.prp1 = cpu_to_le64(nvme_virt_to_bus(dev, buffer)); > c.rw.prp2 = cpu_to_le64(prp2); > status = nvme_submit_sync_cmd(dev->queues[NVME_IO_Q], > &c, NULL, IO_TIMEOUT); > @@ -834,6 +837,7 @@ static int nvme_probe(struct udevice *udev) > struct nvme_id_ns *id; > > ndev->instance = trailing_strtol(udev->name); > + ndev->dev = udev->parent;
It only translates addresses correctly once I chose the parent device. I am pretty sure that this is not the right way to fix it. Shouldn't the child automatically assume that the same translation is required? -- Stefan > > INIT_LIST_HEAD(&ndev->namespaces); > ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0, > diff --git a/drivers/nvme/nvme.h b/drivers/nvme/nvme.h > index c6aae4da5d..31e6899bca 100644 > --- a/drivers/nvme/nvme.h > +++ b/drivers/nvme/nvme.h > @@ -7,8 +7,15 @@ > #ifndef __DRIVER_NVME_H__ > #define __DRIVER_NVME_H__ > > +#include <phys2bus.h> > #include <asm/io.h> > > +#if CONFIG_IS_ENABLED(DM_USB) > +#define nvme_to_dev(_dev) _dev->dev > +#else > +#define nvme_to_dev(_dev) NULL > +#endif > + > struct nvme_id_power_state { > __le16 max_power; /* centiwatts */ > __u8 rsvd2; > @@ -596,6 +603,9 @@ enum { > > /* Represents an NVM Express device. Each nvme_dev is a PCI function. */ > struct nvme_dev { > +#if CONFIG_IS_ENABLED(DM_USB) > + struct udevice *dev; > +#endif > struct list_head node; > struct nvme_queue **queues; > u32 __iomem *dbs; > @@ -635,4 +645,9 @@ struct nvme_ns { > u8 flbas; > }; > > +static inline dma_addr_t nvme_virt_to_bus(struct nvme_dev *dev, void *addr) > +{ > + return dev_phys_to_bus(nvme_to_dev(dev), virt_to_phys(addr)); > +} > + > #endif /* __DRIVER_NVME_H__ */