We want to use multiple entities for clears to enable parallelism. To achieve this, let amdgpu_fill_buffer select the entity to use when it's possible by passing a NULL entity.
We can also simplify the signature and remove the resv param: amdgpu_move_blit is the only caller that doesn't use it, and it's also the only caller that needs to specify an entity. Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 11 +---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 54 ++++++++++++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 - 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index be3532134e46..4a69324bb730 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1287,7 +1287,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); - struct dma_fence *fence = NULL; struct amdgpu_bo *abo; int r; @@ -1317,18 +1316,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev))) goto out; - r = dma_resv_reserve_fences(&bo->base._resv, 1); - if (r) - goto out; - - r = amdgpu_fill_buffer(&adev->mman.clear_entities[0], abo, 0, &bo->base._resv, - &fence, NULL, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); + r = amdgpu_fill_buffer(NULL, abo, 0, NULL, + NULL, AMDGPU_KERNEL_JOB_ID_CLEAR_ON_RELEASE); if (WARN_ON(r)) goto out; amdgpu_vram_mgr_set_cleared(bo->resource); - dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL); - dma_fence_put(fence); out: dma_resv_unlock(&bo->base._resv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b94ac16c785b..c357a6d9763a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -184,7 +184,8 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity, struct amdgpu_res_cursor *mm_cur, unsigned int window, struct amdgpu_ring *ring, bool tmz, uint64_t *size, uint64_t *addr, - struct dma_fence *dep) + struct dma_fence *dep, + struct dma_resv *resv) { struct amdgpu_device *adev = ring->adev; unsigned int offset, num_pages, num_dw, num_bytes; @@ -239,6 +240,10 @@ static int amdgpu_ttm_map_buffer(struct drm_sched_entity *entity, if (dep) drm_sched_job_add_dependency(&job->base, dma_fence_get(dep)); + if (resv) + drm_sched_job_add_resv_dependencies(&job->base, resv, + DMA_RESV_USAGE_BOOKKEEP); + src_addr = num_dw * 4; src_addr += job->ibs[0].gpu_addr; @@ -332,14 +337,14 @@ static int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, r = amdgpu_ttm_map_buffer(&entity->base, src->bo, src->mem, &src_mm, entity->gart_window_id0, ring, tmz, &cur_size, &from, - NULL); + NULL, NULL); if (r) goto error; r = amdgpu_ttm_map_buffer(&entity->base, dst->bo, dst->mem, &dst_mm, entity->gart_window_id1, ring, tmz, &cur_size, &to, - NULL); + NULL, NULL); if (r) goto error; @@ -422,7 +427,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct dma_fence *wipe_fence = NULL; r = amdgpu_fill_buffer(&adev->mman.move_entities[0], - abo, 0, NULL, &wipe_fence, fence, + abo, 0, &wipe_fence, fence, AMDGPU_KERNEL_JOB_ID_MOVE_BLIT); if (r) { goto error; @@ -2451,7 +2456,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, r = amdgpu_ttm_map_buffer(&entity->base, &bo->tbo, bo->tbo.resource, &cursor, entity->gart_window_id1, ring, false, &size, &addr, - NULL); + NULL, NULL); if (r) goto err; @@ -2472,10 +2477,21 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, return r; } +/** + * amdgpu_fill_buffer - fill a buffer with a given value + * @entity: optional entity to use. If NULL, the clearing entities will be + * used to load-balance the partial clears + * @bo: the bo to fill + * @src_data: the value to set + * @f: optional out fence. If @entity is NULL, this must be NULL and the + * fences from each partial clear will be added to the &dma_resv. + * @dependency: optional input dependency fence. + * @k_job_id: trace id + * + */ int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity, struct amdgpu_bo *bo, uint32_t src_data, - struct dma_resv *resv, struct dma_fence **f, struct dma_fence *dependency, u64 k_job_id) @@ -2483,15 +2499,29 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity, struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct dma_fence *fence = NULL; + struct dma_resv *resv = NULL; struct amdgpu_res_cursor dst; int r; + /* The fences will be either added to the resv object or the last fence + * will be returned to the caller. In the latter case, all fill jobs will + * be executed on the same ring. + */ + WARN_ON_ONCE((entity && !f) || (!entity && f)); if (!adev->mman.buffer_funcs_enabled) { dev_err(adev->dev, "Trying to clear memory with ring turned off.\n"); return -EINVAL; } + if (!entity) { + entity = &adev->mman.clear_entities[0]; + resv = &bo->tbo.base._resv; + r = dma_resv_reserve_fences(resv, 1); + if (r) + return r; + } + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst); mutex_lock(&entity->gart_window_lock); @@ -2506,7 +2536,8 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity, &bo->tbo, bo->tbo.resource, &dst, entity->gart_window_id1, ring, false, &cur_size, &to, - dependency); + dependency, + resv); if (r) goto error; @@ -2516,8 +2547,13 @@ int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity, if (r) goto error; - dma_fence_put(fence); - fence = next; + if (resv) { + dma_resv_add_fence(resv, next, DMA_RESV_USAGE_KERNEL); + dma_fence_put(next); + } else { + dma_fence_put(fence); + fence = next; + } amdgpu_res_next(&dst, cur_size); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9dd2a76a5641..38df2b5b4bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -184,7 +184,6 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, int amdgpu_fill_buffer(struct amdgpu_ttm_entity *entity, struct amdgpu_bo *bo, uint32_t src_data, - struct dma_resv *resv, struct dma_fence **f, struct dma_fence *dependency, u64 k_job_id); -- 2.43.0
