Re: [PATCH v9 12/32] drm: msm: fix common struct sg_table related issues
On Tue, Sep 1, 2020 at 12:14 PM Robin Murphy wrote: > > On 2020-08-26 07:32, Marek Szyprowski wrote: > > The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function > > returns the number of the created entries in the DMA address space. > > However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and > > dma_unmap_sg must be called with the original number of the entries > > passed to the dma_map_sg(). > > > > struct sg_table is a common structure used for describing a non-contiguous > > memory buffer, used commonly in the DRM and graphics subsystems. It > > consists of a scatterlist with memory pages and DMA addresses (sgl entry), > > as well as the number of scatterlist entries: CPU pages (orig_nents entry) > > and DMA mapped pages (nents entry). > > > > It turned out that it was a common mistake to misuse nents and orig_nents > > entries, calling DMA-mapping functions with a wrong number of entries or > > ignoring the number of mapped entries returned by the dma_map_sg() > > function. > > > > To avoid such issues, lets use a common dma-mapping wrappers operating > > directly on the struct sg_table objects and use scatterlist page > > iterators where possible. This, almost always, hides references to the > > nents and orig_nents entries, making the code robust, easier to follow > > and copy/paste safe. > > > > Signed-off-by: Marek Szyprowski > > Acked-by: Rob Clark > > --- > > drivers/gpu/drm/msm/msm_gem.c| 13 + > > drivers/gpu/drm/msm/msm_gpummu.c | 14 ++ > > drivers/gpu/drm/msm/msm_iommu.c | 2 +- > > 3 files changed, 12 insertions(+), 17 deletions(-) > > > > diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c > > index b2f49152b4d4..8c7ae812b813 100644 > > --- a/drivers/gpu/drm/msm/msm_gem.c > > +++ b/drivers/gpu/drm/msm/msm_gem.c > > @@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object > > *msm_obj) > > struct device *dev = msm_obj->base.dev->dev; > > > > if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { > > - dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, > > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > > + dma_sync_sgtable_for_device(dev, msm_obj->sgt, > > + DMA_BIDIRECTIONAL); > > } else { > > - dma_map_sg(dev, msm_obj->sgt->sgl, > > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > > + dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); > > } > > } > > > > @@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj) > > struct device *dev = msm_obj->base.dev->dev; > > > > if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { > > - dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, > > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > > + dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, > > DMA_BIDIRECTIONAL); > > } else { > > - dma_unmap_sg(dev, msm_obj->sgt->sgl, > > - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); > > + dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); > > } > > } > > > > diff --git a/drivers/gpu/drm/msm/msm_gpummu.c > > b/drivers/gpu/drm/msm/msm_gpummu.c > > index 310a31b05faa..319f06c28235 100644 > > --- a/drivers/gpu/drm/msm/msm_gpummu.c > > +++ b/drivers/gpu/drm/msm/msm_gpummu.c > > @@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t > > iova, > > { > > struct msm_gpummu *gpummu = to_msm_gpummu(mmu); > > unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE; > > - struct scatterlist *sg; > > + struct sg_dma_page_iter dma_iter; > > unsigned prot_bits = 0; > > - unsigned i, j; > > > > if (prot & IOMMU_WRITE) > > prot_bits |= 1; > > if (prot & IOMMU_READ) > > prot_bits |= 2; > > > > - for_each_sg(sgt->sgl, sg, sgt->nents, i) { > > - dma_addr_t addr = sg->dma_address; > > - for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) { > > - gpummu->table[idx] = addr | prot_bits; > > - addr += GPUMMU_PAGE_SIZE; > > - } > > + for_each_sgtable_dma_page(sgt, _iter, 0) { > > + dma_addr_t addr = sg_page_iter_dma_address(_iter); > > + > > + BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE); > > + gpummu->table[idx++] = addr | prot_bits; > > Given that the BUILD_BUG_ON might prevent valid arm64 configs from > building, how about a simple tweak like: > > for (i = 0; i < PAGE_SIZE; i += GPUMMU_PAGE_SIZE) > gpummu->table[idx++] = i + addr | prot_bits; > ? > > Or alternatively perhaps some more aggressive #ifdefs or makefile tweaks > to prevent the GPUMMU code building for arm64 at all if it's only > relevant to 32-bit platforms (which I believe might be the case).
Re: [PATCH v9 12/32] drm: msm: fix common struct sg_table related issues
On 2020-08-26 07:32, Marek Szyprowski wrote: The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function returns the number of the created entries in the DMA address space. However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and dma_unmap_sg must be called with the original number of the entries passed to the dma_map_sg(). struct sg_table is a common structure used for describing a non-contiguous memory buffer, used commonly in the DRM and graphics subsystems. It consists of a scatterlist with memory pages and DMA addresses (sgl entry), as well as the number of scatterlist entries: CPU pages (orig_nents entry) and DMA mapped pages (nents entry). It turned out that it was a common mistake to misuse nents and orig_nents entries, calling DMA-mapping functions with a wrong number of entries or ignoring the number of mapped entries returned by the dma_map_sg() function. To avoid such issues, lets use a common dma-mapping wrappers operating directly on the struct sg_table objects and use scatterlist page iterators where possible. This, almost always, hides references to the nents and orig_nents entries, making the code robust, easier to follow and copy/paste safe. Signed-off-by: Marek Szyprowski Acked-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c| 13 + drivers/gpu/drm/msm/msm_gpummu.c | 14 ++ drivers/gpu/drm/msm/msm_iommu.c | 2 +- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b2f49152b4d4..8c7ae812b813 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object *msm_obj) struct device *dev = msm_obj->base.dev->dev; if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { - dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_sync_sgtable_for_device(dev, msm_obj->sgt, + DMA_BIDIRECTIONAL); } else { - dma_map_sg(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); } } @@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj) struct device *dev = msm_obj->base.dev->dev; if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { - dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, DMA_BIDIRECTIONAL); } else { - dma_unmap_sg(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); } } diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 310a31b05faa..319f06c28235 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t iova, { struct msm_gpummu *gpummu = to_msm_gpummu(mmu); unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE; - struct scatterlist *sg; + struct sg_dma_page_iter dma_iter; unsigned prot_bits = 0; - unsigned i, j; if (prot & IOMMU_WRITE) prot_bits |= 1; if (prot & IOMMU_READ) prot_bits |= 2; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - dma_addr_t addr = sg->dma_address; - for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) { - gpummu->table[idx] = addr | prot_bits; - addr += GPUMMU_PAGE_SIZE; - } + for_each_sgtable_dma_page(sgt, _iter, 0) { + dma_addr_t addr = sg_page_iter_dma_address(_iter); + + BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE); + gpummu->table[idx++] = addr | prot_bits; Given that the BUILD_BUG_ON might prevent valid arm64 configs from building, how about a simple tweak like: for (i = 0; i < PAGE_SIZE; i += GPUMMU_PAGE_SIZE) gpummu->table[idx++] = i + addr | prot_bits; ? Or alternatively perhaps some more aggressive #ifdefs or makefile tweaks to prevent the GPUMMU code building for arm64 at all if it's only relevant to 32-bit platforms (which I believe might be the case). Robin. } /* we can improve by deferring flush for multiple map() */ diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 3a381a9674c9..6c31e65834c6 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -36,7 +36,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
Re: [PATCH v9 12/32] drm: msm: fix common struct sg_table related issues
Hi Marek, I love your patch! Yet something to improve: [auto build test ERROR on linuxtv-media/master] [also build test ERROR on drm-intel/for-linux-next linus/master v5.9-rc2 next-20200826] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Marek-Szyprowski/DRM-fix-struct-sg_table-nents-vs-orig_nents-misuse/20200826-143908 base: git://linuxtv.org/media_tree.git master config: arm64-randconfig-r002-20200826 (attached as .config) compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 7cfcecece0e0430937cf529ce74d3a071a4dedc6) reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # install arm64 cross compiling tool for clang build # apt-get install binutils-aarch64-linux-gnu # save the attached .config to linux build tree COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=arm64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot All errors (new ones prefixed by >>): aarch64-linux-gnu-ld: warning: -z norelro ignored aarch64-linux-gnu-ld: fs/orangefs/orangefs-debugfs.o: in function `orangefs_debug_read': fs/orangefs/orangefs-debugfs.c:375: undefined reference to `stpcpy' aarch64-linux-gnu-ld: security/apparmor/lsm.o: in function `param_get_mode': security/apparmor/lsm.c:1559: undefined reference to `stpcpy' aarch64-linux-gnu-ld: security/apparmor/lsm.o: in function `param_get_audit': security/apparmor/lsm.c:1530: undefined reference to `stpcpy' aarch64-linux-gnu-ld: crypto/async_tx/async_tx.o: in function `async_tx_channel_switch': crypto/async_tx/async_tx.c:118: undefined reference to `dma_wait_for_async_tx' aarch64-linux-gnu-ld: crypto/async_tx/async_tx.o: in function `async_tx_quiesce': crypto/async_tx/async_tx.c:270: undefined reference to `dma_wait_for_async_tx' aarch64-linux-gnu-ld: crypto/async_tx/async_tx.c:270: undefined reference to `dma_wait_for_async_tx' aarch64-linux-gnu-ld: crypto/async_tx/async_memcpy.o: in function `async_memcpy': crypto/async_tx/async_memcpy.c:43: undefined reference to `dmaengine_get_unmap_data' aarch64-linux-gnu-ld: crypto/async_tx/async_memcpy.c:89: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: crypto/async_tx/async_xor.o: in function `async_xor': crypto/async_tx/async_xor.c:172: undefined reference to `dmaengine_get_unmap_data' aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:199: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:199: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:196: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: crypto/async_tx/async_xor.o: in function `async_xor_val': crypto/async_tx/async_xor.c:268: undefined reference to `dmaengine_get_unmap_data' aarch64-linux-gnu-ld: crypto/async_tx/async_xor.c:324: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: crypto/async_tx/async_pq.o: in function `async_gen_syndrome': crypto/async_tx/async_pq.c:176: undefined reference to `dmaengine_get_unmap_data' aarch64-linux-gnu-ld: crypto/async_tx/async_pq.c:233: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: crypto/async_tx/async_pq.c:229: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: crypto/async_tx/async_pq.o: in function `async_syndrome_val': crypto/async_tx/async_pq.c:295: undefined reference to `dmaengine_get_unmap_data' aarch64-linux-gnu-ld: crypto/async_tx/async_pq.c:412: undefined reference to `dmaengine_unmap_put' aarch64-linux-gnu-ld: drivers/xen/sys-hypervisor.o: in function `buildid_show': drivers/xen/sys-hypervisor.c:375: undefined reference to `stpcpy' aarch64-linux-gnu-ld: drivers/tty/tty_io.o: in function `tty_line_name': drivers/tty/tty_io.c:1139: undefined reference to `stpcpy' aarch64-linux-gnu-ld: drivers/tty/tty_io.c:1139: undefined reference to `stpcpy' aarch64-linux-gnu-ld: drivers/tty/tty_io.c:1139: undefined reference to `stpcpy' aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.o: in function `dsi_dma_workaround_write': drivers/gpu/drm/vc4/vc4_dsi.c:581: undefined reference to `dma_sync_wait' aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.c:581: undefined reference to `dma_sync_wait' aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.c:581: undefined reference to `dma_sync_wait' aarch64-linux-gnu-ld: drivers/gpu/drm/vc4/vc4_dsi.o: in function `vc4_dsi_host_transfer': drivers/gpu/drm/vc4/vc4_dsi.c:564: undefined reference to `dma_sync_wait' aarch64-linux-gnu-ld:
[PATCH v9 12/32] drm: msm: fix common struct sg_table related issues
The Documentation/DMA-API-HOWTO.txt states that the dma_map_sg() function returns the number of the created entries in the DMA address space. However the subsequent calls to the dma_sync_sg_for_{device,cpu}() and dma_unmap_sg must be called with the original number of the entries passed to the dma_map_sg(). struct sg_table is a common structure used for describing a non-contiguous memory buffer, used commonly in the DRM and graphics subsystems. It consists of a scatterlist with memory pages and DMA addresses (sgl entry), as well as the number of scatterlist entries: CPU pages (orig_nents entry) and DMA mapped pages (nents entry). It turned out that it was a common mistake to misuse nents and orig_nents entries, calling DMA-mapping functions with a wrong number of entries or ignoring the number of mapped entries returned by the dma_map_sg() function. To avoid such issues, lets use a common dma-mapping wrappers operating directly on the struct sg_table objects and use scatterlist page iterators where possible. This, almost always, hides references to the nents and orig_nents entries, making the code robust, easier to follow and copy/paste safe. Signed-off-by: Marek Szyprowski Acked-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c| 13 + drivers/gpu/drm/msm/msm_gpummu.c | 14 ++ drivers/gpu/drm/msm/msm_iommu.c | 2 +- 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b2f49152b4d4..8c7ae812b813 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -53,11 +53,10 @@ static void sync_for_device(struct msm_gem_object *msm_obj) struct device *dev = msm_obj->base.dev->dev; if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { - dma_sync_sg_for_device(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_sync_sgtable_for_device(dev, msm_obj->sgt, + DMA_BIDIRECTIONAL); } else { - dma_map_sg(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_map_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); } } @@ -66,11 +65,9 @@ static void sync_for_cpu(struct msm_gem_object *msm_obj) struct device *dev = msm_obj->base.dev->dev; if (get_dma_ops(dev) && IS_ENABLED(CONFIG_ARM64)) { - dma_sync_sg_for_cpu(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_sync_sgtable_for_cpu(dev, msm_obj->sgt, DMA_BIDIRECTIONAL); } else { - dma_unmap_sg(dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + dma_unmap_sgtable(dev, msm_obj->sgt, DMA_BIDIRECTIONAL, 0); } } diff --git a/drivers/gpu/drm/msm/msm_gpummu.c b/drivers/gpu/drm/msm/msm_gpummu.c index 310a31b05faa..319f06c28235 100644 --- a/drivers/gpu/drm/msm/msm_gpummu.c +++ b/drivers/gpu/drm/msm/msm_gpummu.c @@ -30,21 +30,19 @@ static int msm_gpummu_map(struct msm_mmu *mmu, uint64_t iova, { struct msm_gpummu *gpummu = to_msm_gpummu(mmu); unsigned idx = (iova - GPUMMU_VA_START) / GPUMMU_PAGE_SIZE; - struct scatterlist *sg; + struct sg_dma_page_iter dma_iter; unsigned prot_bits = 0; - unsigned i, j; if (prot & IOMMU_WRITE) prot_bits |= 1; if (prot & IOMMU_READ) prot_bits |= 2; - for_each_sg(sgt->sgl, sg, sgt->nents, i) { - dma_addr_t addr = sg->dma_address; - for (j = 0; j < sg->length / GPUMMU_PAGE_SIZE; j++, idx++) { - gpummu->table[idx] = addr | prot_bits; - addr += GPUMMU_PAGE_SIZE; - } + for_each_sgtable_dma_page(sgt, _iter, 0) { + dma_addr_t addr = sg_page_iter_dma_address(_iter); + + BUILD_BUG_ON(GPUMMU_PAGE_SIZE != PAGE_SIZE); + gpummu->table[idx++] = addr | prot_bits; } /* we can improve by deferring flush for multiple map() */ diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 3a381a9674c9..6c31e65834c6 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -36,7 +36,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova, struct msm_iommu *iommu = to_msm_iommu(mmu); size_t ret; - ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot); + ret = iommu_map_sgtable(iommu->domain, iova, sgt, prot); WARN_ON(!ret); return (ret == len) ? 0 : -EINVAL; -- 2.17.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel