To support better memory access performance on non-Large BAR devices, use
SDMA copies instead of MM access.

SDMA access is restricted to PAGE_SIZE'd access to account for the PTRACED
process memory r/w operation use case.  Any other access size will use
MMIO.

Failure to do an SDMA copy will result in a fallback to MM access.

Note: This is an attempt readdress patch request
'drm/amdgpu: extend ttm memory access to do sdma copies'
with the addition of restrictions and fallbacks.

Signed-off-by: Jonathan Kim <jonathan....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 97 +++++++++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 23fc57506a20..1cb984252f58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1741,6 +1741,91 @@ static void amdgpu_ttm_vram_mm_access(struct 
amdgpu_device *adev, loff_t pos,
        }
 }
 
+/**
+ * amdgpu_ttm_access_memory_page_sdma - Read/write page of memory that backs a 
buffer object.
+ *
+ * @bo:  The buffer object to read/write
+ * @offset:  Offset into buffer object
+ * @buf:  Secondary buffer to write/read from
+ * @write:  true if writing
+ *
+ * This is used to access a page of VRAM that backs a buffer object via SDMA
+ * access for debugging purposes.
+ */
+static int amdgpu_ttm_access_memory_page_sdma(struct ttm_buffer_object *bo,
+                                       unsigned long offset, void *buf,
+                                       int write)
+{
+       struct amdgpu_bo *dst_bo, *abo = ttm_to_amdgpu_bo(bo);
+       struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+       struct ttm_operation_ctx ctx = {.interruptible = true};
+       struct amdgpu_copy_mem src, dst;
+       struct drm_gem_object *gobj;
+       struct dma_fence *fence;
+       struct page *dst_page;
+       struct ttm_tt *dst_ttm;
+       int ret;
+
+       /* Create an SG BO to dma map the target buffer for direct copy. */
+       ret = amdgpu_gem_object_create(adev, PAGE_SIZE, PAGE_SIZE, 
AMDGPU_GEM_DOMAIN_CPU,
+                               0, ttm_bo_type_sg, NULL, &gobj);
+       if (ret)
+               return ret;
+
+       dst_bo = gem_to_amdgpu_bo(gobj);
+       dst_ttm = dst_bo->tbo.ttm;
+       dst_ttm->sg = kmalloc(sizeof(*dst_ttm->sg), GFP_KERNEL);
+       if (unlikely(!dst_ttm->sg)) {
+               ret = -ENOMEM;
+               goto free_bo;
+       }
+
+       dst_page = virt_to_page(buf);
+       ret = sg_alloc_table_from_pages(dst_ttm->sg, &dst_page, 1, 0,
+                                       1 << PAGE_SHIFT, GFP_KERNEL);
+       if (unlikely(ret))
+               goto free_sg;
+
+       ret = dma_map_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+       if (unlikely(ret))
+               goto release_sg;
+
+       drm_prime_sg_to_dma_addr_array(dst_ttm->sg, dst_ttm->dma_address, 1);
+
+       amdgpu_bo_placement_from_domain(dst_bo, AMDGPU_GEM_DOMAIN_GTT);
+       ret = ttm_bo_validate(&dst_bo->tbo, &dst_bo->placement, &ctx);
+       if (ret)
+               goto unmap_sg;
+
+       src.mem = bo->resource;
+       src.offset = offset;
+       dst.bo = &dst_bo->tbo;
+       dst.mem = dst.bo->resource;
+       dst.offset = 0;
+
+       /* Do the direct copy and wait for fence response. */
+       ret = amdgpu_ttm_copy_mem_to_mem(adev, write ? &dst : &src, write ? 
&src : &dst,
+                                       1 << PAGE_SHIFT, 
amdgpu_bo_encrypted(abo),
+                                       bo->base.resv, &fence);
+       if (!ret && fence) {
+               if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+                       ret = -ETIMEDOUT;
+
+               dma_fence_put(fence);
+       }
+
+unmap_sg:
+       dma_unmap_sgtable(adev->dev, dst_ttm->sg, DMA_BIDIRECTIONAL, 0);
+release_sg:
+       sg_free_table(dst_ttm->sg);
+free_sg:
+       kfree(dst_ttm->sg);
+       dst_ttm->sg = NULL;
+free_bo:
+       gobj->funcs->free(gobj);
+       return ret;
+}
+
 /**
  * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
  *
@@ -1765,7 +1850,19 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
        if (bo->resource->mem_type != TTM_PL_VRAM)
                return -EIO;
 
+       /*
+        * Attempt SDMA access over non-visible VRAM first.
+        * On failure, fall back to MMIO access.
+        *
+        * Restrict this to PAGE_SIZE access for PTRACED memory operations.
+        * Any other access size should use MM access.
+        */
        amdgpu_res_first(bo->resource, offset, len, &cursor);
+       if (adev->gmc.visible_vram_size < cursor.start + len && len == 
PAGE_SIZE &&
+                       !amdgpu_in_reset(adev) &&
+                               !amdgpu_ttm_access_memory_page_sdma(bo, offset, 
buf, write))
+               return len;
+
        while (cursor.remaining) {
                size_t count, size = cursor.size;
                loff_t pos = cursor.start;
-- 
2.25.1

Reply via email to