On Fri, Jan 18, 2019 at 05:25:20PM -0700, Dave Jiang wrote:
> 
> 
> On 1/18/19 5:10 PM, Logan Gunthorpe wrote:
> > Presently, when ntb_transport is used with DMA and the IOMMU turned on,
> > it fails with errors from the IOMMU such as:
> > 
> >   DMAR: DRHD: handling fault status reg 202
> >   DMAR: [DMA Write] Request device [00:04.0] fault addr
> >     381fc0340000 [fault reason 05] PTE Write access is not set
> > 
> > This is because ntb_transport does not map the BAR space with the IOMMU.
> > 
> > To fix this, we map the entire MW region for each QP after we assign
> > the DMA channel. This prevents needing an extra DMA map in the fast
> > path.
> > 
> > Link: 
> > https://lore.kernel.org/linux-pci/499934e7-3734-1aee-37dd-b42a5d2a2...@intel.com/
> > Signed-off-by: Logan Gunthorpe <log...@deltatee.com>
> > Cc: Jon Mason <jdma...@kudzu.us>
> > Cc: Dave Jiang <dave.ji...@intel.com>
> > Cc: Allen Hubbe <alle...@gmail.com>
> 
> Nice! I actually never encountered this on the Intel NTB with IOMMU on.
> It also could be that the Intel BIOS already took care of it for all
> embedded device BARs on the uncore. Nevertheless it's needed. Thanks!
> 
> Reviewed-by: Dave Jiang <dave.ji...@intel.com>

Added to the ntb branch, thanks!

> 
> > ---
> >  drivers/ntb/ntb_transport.c | 28 ++++++++++++++++++++++++++--
> >  1 file changed, 26 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
> > index 3bfdb4562408..526b65afc16a 100644
> > --- a/drivers/ntb/ntb_transport.c
> > +++ b/drivers/ntb/ntb_transport.c
> > @@ -144,7 +144,9 @@ struct ntb_transport_qp {
> >     struct list_head tx_free_q;
> >     spinlock_t ntb_tx_free_q_lock;
> >     void __iomem *tx_mw;
> > -   dma_addr_t tx_mw_phys;
> > +   phys_addr_t tx_mw_phys;
> > +   size_t tx_mw_size;
> > +   dma_addr_t tx_mw_dma_addr;
> >     unsigned int tx_index;
> >     unsigned int tx_max_entry;
> >     unsigned int tx_max_frame;
> > @@ -1049,6 +1051,7 @@ static int ntb_transport_init_queue(struct 
> > ntb_transport_ctx *nt,
> >     tx_size = (unsigned int)mw_size / num_qps_mw;
> >     qp_offset = tx_size * (qp_num / mw_count);
> >  
> > +   qp->tx_mw_size = tx_size;
> >     qp->tx_mw = nt->mw_vec[mw_num].vbase + qp_offset;
> >     if (!qp->tx_mw)
> >             return -EINVAL;
> > @@ -1644,7 +1647,7 @@ static int ntb_async_tx_submit(struct 
> > ntb_transport_qp *qp,
> >     dma_cookie_t cookie;
> >  
> >     device = chan->device;
> > -   dest = qp->tx_mw_phys + qp->tx_max_frame * entry->tx_index;
> > +   dest = qp->tx_mw_dma_addr + qp->tx_max_frame * entry->tx_index;
> >     buff_off = (size_t)buf & ~PAGE_MASK;
> >     dest_off = (size_t)dest & ~PAGE_MASK;
> >  
> > @@ -1863,6 +1866,18 @@ ntb_transport_create_queue(void *data, struct device 
> > *client_dev,
> >             qp->rx_dma_chan = NULL;
> >     }
> >  
> > +   if (qp->tx_dma_chan) {
> > +           qp->tx_mw_dma_addr =
> > +                   dma_map_resource(qp->tx_dma_chan->device->dev,
> > +                                    qp->tx_mw_phys, qp->tx_mw_size,
> > +                                    DMA_FROM_DEVICE, 0);
> > +           if (dma_mapping_error(qp->tx_dma_chan->device->dev,
> > +                                 qp->tx_mw_dma_addr)) {
> > +                   qp->tx_mw_dma_addr = 0;
> > +                   goto err1;
> > +           }
> > +   }
> > +
> >     dev_dbg(&pdev->dev, "Using %s memcpy for TX\n",
> >             qp->tx_dma_chan ? "DMA" : "CPU");
> >  
> > @@ -1904,6 +1919,10 @@ ntb_transport_create_queue(void *data, struct device 
> > *client_dev,
> >     qp->rx_alloc_entry = 0;
> >     while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
> >             kfree(entry);
> > +   if (qp->tx_mw_dma_addr)
> > +           dma_unmap_resource(qp->tx_dma_chan->device->dev,
> > +                              qp->tx_mw_dma_addr, qp->tx_mw_size,
> > +                              DMA_FROM_DEVICE, 0);
> >     if (qp->tx_dma_chan)
> >             dma_release_channel(qp->tx_dma_chan);
> >     if (qp->rx_dma_chan)
> > @@ -1945,6 +1964,11 @@ void ntb_transport_free_queue(struct 
> > ntb_transport_qp *qp)
> >              */
> >             dma_sync_wait(chan, qp->last_cookie);
> >             dmaengine_terminate_all(chan);
> > +
> > +           dma_unmap_resource(chan->device->dev,
> > +                              qp->tx_mw_dma_addr, qp->tx_mw_size,
> > +                              DMA_FROM_DEVICE, 0);
> > +
> >             dma_release_channel(chan);
> >     }
> >  
> > 

Reply via email to