Am 2022-01-28 um 10:16 schrieb Christian König:
We ran into the problem that clearing really larger buffer (60GiB) caused an
SDMA timeout.
Restructure the function to use the dst window instead of mapping the whole
buffer into the GART and then fill only 2MiB chunks at a time.
Signed-off-by: Christian König
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 200 +---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +
2 files changed, 114 insertions(+), 88 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2b0e83e9fa8a..8671ba32fb52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -296,9 +296,6 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct dma_resv *resv,
struct dma_fence **f)
{
- const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
- AMDGPU_GPU_PAGE_SIZE);
-
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_res_cursor src_mm, dst_mm;
struct dma_fence *fence = NULL;
@@ -320,12 +317,15 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
uint32_t cur_size;
uint64_t from, to;
- /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
-* begins at an offset, then adjust the size accordingly
+ /*
+* Copy size cannot exceed AMDGPU_GTT_MAX_TRANSFER_BYTES. So if
+* src or dst begins at an offset, then adjust the size
+* accordingly
*/
cur_size = max(src_page_offset, dst_page_offset);
cur_size = min(min3(src_mm.size, dst_mm.size, size),
- (uint64_t)(GTT_MAX_BYTES - cur_size));
+ (uint64_t)(AMDGPU_GTT_MAX_TRANSFER_BYTES -
+ cur_size));
/* Map src to window 0 and dst to window 1. */
r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
@@ -395,8 +395,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
struct dma_fence *wipe_fence = NULL;
- r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
- NULL, &wipe_fence);
+ r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence);
if (r) {
goto error;
} else if (wipe_fence) {
@@ -1922,19 +1921,51 @@ void amdgpu_ttm_set_buffer_funcs_status(struct
amdgpu_device *adev, bool enable)
adev->mman.buffer_funcs_enabled = enable;
}
+static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
+ bool direct_submit,
+ unsigned int num_dw,
+ struct dma_resv *resv,
+ bool vm_needs_flush,
+ struct amdgpu_job **job)
+{
+ enum amdgpu_ib_pool_type pool = direct_submit ?
+ AMDGPU_IB_POOL_DIRECT :
+ AMDGPU_IB_POOL_DELAYED;
+ int r;
+
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job);
+ if (r)
+ return r;
+
+ if (vm_needs_flush) {
+ (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
+ adev->gmc.pdb0_bo :
+ adev->gart.bo);
+ (*job)->vm_needs_flush = true;
+ }
+ if (resv) {
+ r = amdgpu_sync_resv(adev, &(*job)->sync, resv,
+AMDGPU_SYNC_ALWAYS,
+AMDGPU_FENCE_OWNER_UNDEFINED);
+ if (r) {
+ DRM_ERROR("sync failed (%d).\n", r);
+ amdgpu_job_free(*job);
+ return r;
+ }
+ }
+ return 0;
+}
+
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct dma_resv *resv,
struct dma_fence **fence, bool direct_submit,
bool vm_needs_flush, bool tmz)
{
- enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
- AMDGPU_IB_POOL_DELAYED;
struct amdgpu_device *adev = ring->adev;
+ unsigned num_loops, num_dw;
struct amdgpu_job *job;
-
uint32_t max_bytes;
- unsigned num_loops, num_dw;
unsigned i;
int r;
@@ -1946,26 +1977,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
max_bytes = adev->mman.buffer_funcs->copy_max_bytes