MMIO_REMAP (HDP flush page) exposes a hardware MMIO register window via a PCI BAR; there are no struct pages backing it (not normal RAM). But when one device shares memory with another through dma-buf, the receiver still expects a delivery route—a list of DMA-able chunks—called an sg_table. For the BAR window, we can’t (no pages!), so we instead create a one-entry list that points directly to the BAR’s physical bus address and tell DMA: “use this I/O span.” - A single, contiguous byte range on the PCI bus (start DMA address + length)). That’s why we map it with dma_map_resource() and set sg_set_page(..., NULL, ...). Perform DMA reads/writes directly to that range so we build an sg_table from a BAR physical span and map it with dma_map_resource().
This patch centralizes the BAR-I/O mapping in TTM and wires dma-buf to it: Add amdgpu_ttm_mmio_remap_alloc_sgt() / amdgpu_ttm_mmio_remap_free_sgt(). They walk the TTM resource via amdgpu_res_cursor, add the byte offset to adev->rmmio_remap.bus_addr, build a one-entry sg_table with sg_set_page(NULL, …), and map/unmap it with dma_map_resource(). In dma-buf map/unmap, if the BO is in AMDGPU_PL_MMIO_REMAP, call the new helpers. Single place for BAR-I/O handling: amdgpu_ttm.c in amdgpu_ttm_mmio_remap_alloc_sgt() and ..._free_sgt(). No struct pages: sg_set_page(sg, NULL, cur.size, 0); inside amdgpu_ttm_mmio_remap_alloc_sgt(). Minimal sg_table: sg_alloc_table(*sgt, 1, GFP_KERNEL); inside amdgpu_ttm_mmio_remap_alloc_sgt(). Hooked into dma-buf: amdgpu_dma_buf_map()/unmap() in amdgpu_dma_buf.c call these helpers for AMDGPU_PL_MMIO_REMAP. Suggested-by: Christian König <[email protected]> Cc: Alex Deucher <[email protected]> Signed-off-by: Srinivasan Shanmugam <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 18 +++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 80 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 9 +++ 3 files changed, 107 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index ff98c87b2e0b..2fbd6d458a6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -37,6 +37,7 @@ #include "amdgpu_dma_buf.h" #include "amdgpu_xgmi.h" #include "amdgpu_vm.h" +#include "amdgpu_ttm.h" #include <drm/amdgpu_drm.h> #include <drm/ttm/ttm_tt.h> #include <linux/dma-buf.h> @@ -210,6 +211,14 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach, if (r) return ERR_PTR(r); break; + + case AMDGPU_PL_MMIO_REMAP: + r = amdgpu_ttm_mmio_remap_alloc_sgt(adev, bo->tbo.resource, + attach->dev, dir, &sgt); + if (r) + return ERR_PTR(r); + break; + default: return ERR_PTR(-EINVAL); } @@ -235,6 +244,15 @@ static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { + struct drm_gem_object *obj = attach->dmabuf->priv; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + + if (bo->tbo.resource && + bo->tbo.resource->mem_type == AMDGPU_PL_MMIO_REMAP) { + amdgpu_ttm_mmio_remap_free_sgt(attach->dev, dir, sgt); + return; + } + if (sg_page(sgt->sgl)) { dma_unmap_sgtable(attach->dev, sgt, dir, 0); sg_free_table(sgt); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d4c93c78b80a..17ea079bd96f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1102,6 +1102,86 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, kfree(gtt); } +/** + * amdgpu_ttm_mmio_remap_alloc_sgt - build an sg_table for MMIO_REMAP I/O aperture + * @adev: amdgpu device providing the remap BAR base (adev->rmmio_remap.bus_addr) + * @res: TTM resource of the BO to export; expected to live in AMDGPU_PL_MMIO_REMAP + * @dev: importing device to map for (typically @attach->dev in dma-buf paths) + * @dir: DMA data direction for the importer (passed to dma_map_resource()) + * @sgt: output; on success, set to a newly allocated sg_table describing the I/O span + * + * The HDP flush page (AMDGPU_PL_MMIO_REMAP) is a fixed hardware I/O window in a PCI + * BAR—there are no struct pages to back it. Importers still need a DMA address list, + * so we synthesize a minimal sg_table and populate it from dma_map_resource(), not + * from pages. Using the common amdgpu_res_cursor walker keeps the offset/size math + * consistent with other TTM/manager users. + * + * - @res is assumed to be a small, contiguous I/O region (typically a single 4 KiB + * page) in AMDGPU_PL_MMIO_REMAP. Callers should validate placement before calling. + * - The sg entry is created with sg_set_page(sg, NULL, …) to reflect I/O space. + * - The mapping uses DMA_ATTR_SKIP_CPU_SYNC because this is MMIO, not cacheable RAM. + * - Peer reachability / p2pdma policy checks must be done by the caller. + * + * Return: + * * 0 on success, with *@sgt set to a valid table that must be freed via + * amdgpu_ttm_mmio_remap_free_sgt(). + * * -ENOMEM if allocation of the sg_table fails. + * * -EIO if dma_map_resource() fails. + * + */ +int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev, + struct ttm_resource *res, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt) +{ + struct amdgpu_res_cursor cur; + dma_addr_t dma; + resource_size_t phys; + struct scatterlist *sg; + int r; + + /* Walk the resource once; MMIO_REMAP is expected to be contiguous+small. */ + amdgpu_res_first(res, 0, res->size, &cur); + + /* Translate byte offset in the remap window into a host physical BAR address. */ + phys = adev->rmmio_remap.bus_addr + cur.start; + + /* Build a single-entry sg_table mapped as I/O (no struct page backing). */ + *sgt = kzalloc(sizeof(**sgt), GFP_KERNEL); + if (!*sgt) + return -ENOMEM; + r = sg_alloc_table(*sgt, 1, GFP_KERNEL); + if (r) { + kfree(*sgt); + return r; + } + sg = (*sgt)->sgl; + sg_set_page(sg, NULL, cur.size, 0); /* WHY: I/O space → no pages */ + + dma = dma_map_resource(dev, phys, cur.size, dir, DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(dev, dma)) { + sg_free_table(*sgt); + kfree(*sgt); + return -EIO; + } + sg_dma_address(sg) = dma; + sg_dma_len(sg) = cur.size; + return 0; +} + +void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev, + enum dma_data_direction dir, + struct sg_table *sgt) +{ + struct scatterlist *sg = sgt->sgl; + + dma_unmap_resource(dev, sg_dma_address(sg), sg_dma_len(sg), + dir, DMA_ATTR_SKIP_CPU_SYNC); + sg_free_table(sgt); + kfree(sgt); +} + /** * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index a8379b925878..116f3bb1d64b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -233,4 +233,13 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type); void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); +int amdgpu_ttm_mmio_remap_alloc_sgt(struct amdgpu_device *adev, + struct ttm_resource *res, + struct device *dev, + enum dma_data_direction dir, + struct sg_table **sgt); +void amdgpu_ttm_mmio_remap_free_sgt(struct device *dev, + enum dma_data_direction dir, + struct sg_table *sgt); + #endif -- 2.34.1
