Re: [PATCH v9 12/32] drm: msm: fix common struct sg_table related issues

2020-09-01 Thread Rob Clark
On Tue, Sep 1, 2020 at 12:14 PM Robin Murphy  wrote:
>
> On 2020-08-26 07:32, Marek Szyprowski wrote:
> > The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function
> > returns the number of the created entries in the DMA address space.
> > However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and
> > dma_unmap_sg must be called with the original number of the entries
> > passed to the dma_map_sg().
> >
> > struct sg_table is a common structure used for describing a non-contiguous
> > memory buffer, used commonly in the DRM and graphics subsystems. It
> > consists of a scatterlist with memory pages and DMA addresses (sgl entry),
> > as well as the number of scatterlist entries: CPU pages (orig_nents entry)
> > and DMA mapped pages (nents entry).
> >
> > It turned out that it was a common mistake to misuse nents and orig_nents
> > entries, calling DMA-mapping functions with a wrong number of entries or
> > ignoring the number of mapped entries returned by the dma_map_sg()
> > function.
> >
> > To avoid such issues, lets use a common dma-mapping wrappers operating
> > directly on the struct sg_table objects and use scatterlist page
> > iterators where possible. This, almost always, hides references to the
> > nents and orig_nents entries, making the code robust, easier to follow
> > and copy/paste safe.
> >
> > Signed-off-by: Marek Szyprowski 
> > Acked-by: Rob Clark 
> > ---
> >   drivers/gpu/drm/msm/msm_gem.c| 13 +
> >   drivers/gpu/drm/msm/msm_gpummu.c | 14 ++
> >   drivers/gpu/drm/msm/msm_iommu.c  |  2 +-
> >   3 files changed, 12 insertions(+), 17 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
> > index b2f49152b4d4..8c7ae812b813 100644
> > --- a/drivers/gpu/drm/msm/msm_gem.c
> > +++ b/drivers/gpu/drm/msm/msm_gem.c
> > @@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object 
> > *msm_obj)
> >   struct device *dev = msm_obj->base.dev->dev;
> >
> >   if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {
> > - dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
> > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> > + dma_sync_sgtable_for_device(dev, msm_obj->sgt,
> > + DMA_BIDIRECTIONAL);
> >   } else {
> > - dma_map_sg(dev, msm_obj->sgt->sgl,
> > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> > + dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0);
> >   }
> >   }
> >
> > @@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj)
> >   struct device *dev = msm_obj->base.dev->dev;
> >
> >   if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {
> > - dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
> > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> > + dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, 
> > DMA_BIDIRECTIONAL);
> >   } else {
> > - dma_unmap_sg(dev, msm_obj->sgt->sgl,
> > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
> > + dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0);
> >   }
> >   }
> >
> > diff --git a/drivers/gpu/drm/msm/msm_gpummu.c 
> > b/drivers/gpu/drm/msm/msm_gpummu.c
> > index 310a31b05faa..319f06c28235 100644
> > --- a/drivers/gpu/drm/msm/msm_gpummu.c
> > +++ b/drivers/gpu/drm/msm/msm_gpummu.c
> > @@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t 
> > iova,
> >   {
> >   struct msm_gpummu *gpummu = to_msm_gpummu(mmu);
> >   unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE;
> > - struct scatterlist *sg;
> > + struct sg_dma_page_iter dma_iter;
> >   unsigned prot_bits = 0;
> > - unsigned i, j;
> >
> >   if (prot & IOMMU_WRITE)
> >   prot_bits |= 1;
> >   if (prot & IOMMU_READ)
> >   prot_bits |= 2;
> >
> > - for_each_sg(sgt->sgl, sg, sgt->nents, i) {
> > - dma_addr_t addr = sg->dma_address;
> > - for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) {
> > - gpummu->table[idx] = addr | prot_bits;
> > - addr += GPUMMU_PAGE_SIZE;
> > - }
> > + for_each_sgtable_dma_page(sgt, _iter, 0) {
> > + dma_addr_t addr = sg_page_iter_dma_address(_iter);
> > +
> > + BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE);
> > + gpummu->table[idx++] = addr | prot_bits;
>
> Given that the BUILD_BUG_ON might prevent valid arm64 configs from
> building, how about a simple tweak like:
>
> for (i = 0; i < PAGE_SIZE; i += GPUMMU_PAGE_SIZE)
> gpummu->table[idx++] = i + addr | prot_bits;
> ?
>
> Or alternatively perhaps some more aggressive #ifdefs or makefile tweaks
> to prevent the GPUMMU code building for arm64 at all if it's only
> relevant to 32-bit platforms (which I believe might be the case).


Re: [PATCH v9 12/32] drm: msm: fix common struct sg_table related issues

2020-09-01 Thread Robin Murphy

On 2020-08-26 07:32, Marek Szyprowski wrote:

The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function
returns the number of the created entries in the DMA address space.
However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and
dma_unmap_sg must be called with the original number of the entries
passed to the dma_map_sg().

struct sg_table is a common structure used for describing a non-contiguous
memory buffer, used commonly in the DRM and graphics subsystems. It
consists of a scatterlist with memory pages and DMA addresses (sgl entry),
as well as the number of scatterlist entries: CPU pages (orig_nents entry)
and DMA mapped pages (nents entry).

It turned out that it was a common mistake to misuse nents and orig_nents
entries, calling DMA-mapping functions with a wrong number of entries or
ignoring the number of mapped entries returned by the dma_map_sg()
function.

To avoid such issues, lets use a common dma-mapping wrappers operating
directly on the struct sg_table objects and use scatterlist page
iterators where possible. This, almost always, hides references to the
nents and orig_nents entries, making the code robust, easier to follow
and copy/paste safe.

Signed-off-by: Marek Szyprowski 
Acked-by: Rob Clark 
---
  drivers/gpu/drm/msm/msm_gem.c| 13 +
  drivers/gpu/drm/msm/msm_gpummu.c | 14 ++
  drivers/gpu/drm/msm/msm_iommu.c  |  2 +-
  3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index b2f49152b4d4..8c7ae812b813 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object *msm_obj)
struct device *dev = msm_obj->base.dev->dev;
  
  	if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {

-   dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_sync_sgtable_for_device(dev, msm_obj->sgt,
+   DMA_BIDIRECTIONAL);
} else {
-   dma_map_sg(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0);
}
  }
  
@@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj)

struct device *dev = msm_obj->base.dev->dev;
  
  	if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {

-   dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, DMA_BIDIRECTIONAL);
} else {
-   dma_unmap_sg(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0);
}
  }
  
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c

index 310a31b05faa..319f06c28235 100644
--- a/drivers/gpu/drm/msm/msm_gpummu.c
+++ b/drivers/gpu/drm/msm/msm_gpummu.c
@@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t 
iova,
  {
struct msm_gpummu *gpummu = to_msm_gpummu(mmu);
unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE;
-   struct scatterlist *sg;
+   struct sg_dma_page_iter dma_iter;
unsigned prot_bits = 0;
-   unsigned i, j;
  
  	if (prot & IOMMU_WRITE)

prot_bits |= 1;
if (prot & IOMMU_READ)
prot_bits |= 2;
  
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {

-   dma_addr_t addr = sg->dma_address;
-   for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) {
-   gpummu->table[idx] = addr | prot_bits;
-   addr += GPUMMU_PAGE_SIZE;
-   }
+   for_each_sgtable_dma_page(sgt, _iter, 0) {
+   dma_addr_t addr = sg_page_iter_dma_address(_iter);
+
+   BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE);
+   gpummu->table[idx++] = addr | prot_bits;


Given that the BUILD_BUG_ON might prevent valid arm64 configs from 
building, how about a simple tweak like:


for (i = 0; i < PAGE_SIZE; i += GPUMMU_PAGE_SIZE)
gpummu->table[idx++] = i + addr | prot_bits;
?

Or alternatively perhaps some more aggressive #ifdefs or makefile tweaks 
to prevent the GPUMMU code building for arm64 at all if it's only 
relevant to 32-bit platforms (which I believe might be the case).


Robin.


}
  
  	/* we can improve by deferring flush for multiple map() */

diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 3a381a9674c9..6c31e65834c6 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -36,7 +36,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,

Re: [PATCH v9 12/32] drm: msm: fix common struct sg_table related issues

2020-08-26 Thread kernel test robot
Hi Marek,

I love your patch! Yet something to improve:

[auto build test ERROR on linuxtv-media/master]
[also build test ERROR on drm-intel/for-linux-next linus/master v5.9-rc2 
next-20200826]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Marek-Szyprowski/DRM-fix-struct-sg_table-nents-vs-orig_nents-misuse/20200826-143908
base:   git://linuxtv.org/media_tree.git master
config: arm64-randconfig-r002-20200826 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
7cfcecece0e0430937cf529ce74d3a071a4dedc6)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install arm64 cross compiling tool for clang build
# apt-get install binutils-aarch64-linux-gnu
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   aarch64-linux-gnu-ld: warning: -z norelro ignored
   aarch64-linux-gnu-ld: fs/orangefs/orangefs-debugfs.o: in function 
`orangefs_debug_read':
   fs/orangefs/orangefs-debugfs.c:375: undefined reference to `stpcpy'
   aarch64-linux-gnu-ld: security/apparmor/lsm.o: in function `param_get_mode':
   security/apparmor/lsm.c:1559: undefined reference to `stpcpy'
   aarch64-linux-gnu-ld: security/apparmor/lsm.o: in function `param_get_audit':
   security/apparmor/lsm.c:1530: undefined reference to `stpcpy'
   aarch64-linux-gnu-ld: crypto/async_tx/async_tx.o: in function 
`async_tx_channel_switch':
   crypto/async_tx/async_tx.c:118: undefined reference to 
`dma_wait_for_async_tx'
   aarch64-linux-gnu-ld: crypto/async_tx/async_tx.o: in function 
`async_tx_quiesce':
   crypto/async_tx/async_tx.c:270: undefined reference to 
`dma_wait_for_async_tx'
   aarch64-linux-gnu-ld: crypto/async_tx/async_tx.c:270: undefined reference to 
`dma_wait_for_async_tx'
   aarch64-linux-gnu-ld: crypto/async_tx/async_memcpy.o: in function 
`async_memcpy':
   crypto/async_tx/async_memcpy.c:43: undefined reference to 
`dmaengine_get_unmap_data'
   aarch64-linux-gnu-ld: crypto/async_tx/async_memcpy.c:89: undefined reference 
to `dmaengine_unmap_put'
   aarch64-linux-gnu-ld: crypto/async_tx/async_xor.o: in function `async_xor':
   crypto/async_tx/async_xor.c:172: undefined reference to 
`dmaengine_get_unmap_data'
   aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:199: undefined reference 
to `dmaengine_unmap_put'
   aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:199: undefined reference 
to `dmaengine_unmap_put'
   aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:196: undefined reference 
to `dmaengine_unmap_put'
   aarch64-linux-gnu-ld: crypto/async_tx/async_xor.o: in function 
`async_xor_val':
   crypto/async_tx/async_xor.c:268: undefined reference to 
`dmaengine_get_unmap_data'
   aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:324: undefined reference 
to `dmaengine_unmap_put'
   aarch64-linux-gnu-ld: crypto/async_tx/async_pq.o: in function 
`async_gen_syndrome':
   crypto/async_tx/async_pq.c:176: undefined reference to 
`dmaengine_get_unmap_data'
   aarch64-linux-gnu-ld: crypto/async_tx/async_pq.c:233: undefined reference to 
`dmaengine_unmap_put'
   aarch64-linux-gnu-ld: crypto/async_tx/async_pq.c:229: undefined reference to 
`dmaengine_unmap_put'
   aarch64-linux-gnu-ld: crypto/async_tx/async_pq.o: in function 
`async_syndrome_val':
   crypto/async_tx/async_pq.c:295: undefined reference to 
`dmaengine_get_unmap_data'
   aarch64-linux-gnu-ld: crypto/async_tx/async_pq.c:412: undefined reference to 
`dmaengine_unmap_put'
   aarch64-linux-gnu-ld: drivers/xen/sys-hypervisor.o: in function 
`buildid_show':
   drivers/xen/sys-hypervisor.c:375: undefined reference to `stpcpy'
   aarch64-linux-gnu-ld: drivers/tty/tty_io.o: in function `tty_line_name':
   drivers/tty/tty_io.c:1139: undefined reference to `stpcpy'
   aarch64-linux-gnu-ld: drivers/tty/tty_io.c:1139: undefined reference to 
`stpcpy'
   aarch64-linux-gnu-ld: drivers/tty/tty_io.c:1139: undefined reference to 
`stpcpy'
   aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.o: in function 
`dsi_dma_workaround_write':
   drivers/gpu/drm/vc4/vc4_dsi.c:581: undefined reference to `dma_sync_wait'
   aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.c:581: undefined reference 
to `dma_sync_wait'
   aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.c:581: undefined reference 
to `dma_sync_wait'
   aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.o: in function 
`vc4_dsi_host_transfer':
   drivers/gpu/drm/vc4/vc4_dsi.c:564: undefined reference to `dma_sync_wait'
   aarch64-linux-gnu-ld: 

[PATCH v9 12/32] drm: msm: fix common struct sg_table related issues

2020-08-26 Thread Marek Szyprowski
The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function
returns the number of the created entries in the DMA address space.
However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and
dma_unmap_sg must be called with the original number of the entries
passed to the dma_map_sg().

struct sg_table is a common structure used for describing a non-contiguous
memory buffer, used commonly in the DRM and graphics subsystems. It
consists of a scatterlist with memory pages and DMA addresses (sgl entry),
as well as the number of scatterlist entries: CPU pages (orig_nents entry)
and DMA mapped pages (nents entry).

It turned out that it was a common mistake to misuse nents and orig_nents
entries, calling DMA-mapping functions with a wrong number of entries or
ignoring the number of mapped entries returned by the dma_map_sg()
function.

To avoid such issues, lets use a common dma-mapping wrappers operating
directly on the struct sg_table objects and use scatterlist page
iterators where possible. This, almost always, hides references to the
nents and orig_nents entries, making the code robust, easier to follow
and copy/paste safe.

Signed-off-by: Marek Szyprowski 
Acked-by: Rob Clark 
---
 drivers/gpu/drm/msm/msm_gem.c| 13 +
 drivers/gpu/drm/msm/msm_gpummu.c | 14 ++
 drivers/gpu/drm/msm/msm_iommu.c  |  2 +-
 3 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index b2f49152b4d4..8c7ae812b813 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object *msm_obj)
struct device *dev = msm_obj->base.dev->dev;
 
if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {
-   dma_sync_sg_for_device(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_sync_sgtable_for_device(dev, msm_obj->sgt,
+   DMA_BIDIRECTIONAL);
} else {
-   dma_map_sg(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0);
}
 }
 
@@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj)
struct device *dev = msm_obj->base.dev->dev;
 
if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) {
-   dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, DMA_BIDIRECTIONAL);
} else {
-   dma_unmap_sg(dev, msm_obj->sgt->sgl,
-   msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+   dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0);
}
 }
 
diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c
index 310a31b05faa..319f06c28235 100644
--- a/drivers/gpu/drm/msm/msm_gpummu.c
+++ b/drivers/gpu/drm/msm/msm_gpummu.c
@@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t 
iova,
 {
struct msm_gpummu *gpummu = to_msm_gpummu(mmu);
unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE;
-   struct scatterlist *sg;
+   struct sg_dma_page_iter dma_iter;
unsigned prot_bits = 0;
-   unsigned i, j;
 
if (prot & IOMMU_WRITE)
prot_bits |= 1;
if (prot & IOMMU_READ)
prot_bits |= 2;
 
-   for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-   dma_addr_t addr = sg->dma_address;
-   for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) {
-   gpummu->table[idx] = addr | prot_bits;
-   addr += GPUMMU_PAGE_SIZE;
-   }
+   for_each_sgtable_dma_page(sgt, _iter, 0) {
+   dma_addr_t addr = sg_page_iter_dma_address(_iter);
+
+   BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE);
+   gpummu->table[idx++] = addr | prot_bits;
}
 
/* we can improve by deferring flush for multiple map() */
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 3a381a9674c9..6c31e65834c6 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -36,7 +36,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
struct msm_iommu *iommu = to_msm_iommu(mmu);
size_t ret;
 
-   ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);
+   ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot);
WARN_ON(!ret);
 
return (ret == len) ? 0 : -EINVAL;
-- 
2.17.1

___
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel