because this time SDMA may under GPU RESET so its ring->ready
can be false(e.g. IB test failed during GPU reset), just keep
going and GPU scheduler will reschedule this job once it failed.

v2:
consider error if ring not ready only finds gpu is not doing GPU reset
handle all places in amdgpu_ttm.c as well

Signed-off-by: Monk Liu <monk....@amd.com>
Change-Id: I241036e0ba54c3aadc573d507c7bd615b8b978f9
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e38e6db..9c9e596 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -215,7 +215,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
        case TTM_PL_VRAM:
                if (adev->mman.buffer_funcs &&
                    adev->mman.buffer_funcs_ring &&
-                   adev->mman.buffer_funcs_ring->ready == false) {
+                   (adev->mman.buffer_funcs_ring->ready == false && 
!adev->in_gpu_reset) ) {
                        amdgpu_ttm_placement_from_domain(abo, 
AMDGPU_GEM_DOMAIN_CPU);
                } else if (adev->gmc.visible_vram_size < 
adev->gmc.real_vram_size &&
                           !(abo->flags & 
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
@@ -331,7 +331,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
        const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
                                        AMDGPU_GPU_PAGE_SIZE);
 
-       if (!ring->ready) {
+       if (!ring->ready && !adev->in_gpu_reset) {
                DRM_ERROR("Trying to move memory with ring turned off.\n");
                return -EINVAL;
        }
@@ -579,7 +579,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, 
bool evict,
        }
        if (adev->mman.buffer_funcs == NULL ||
            adev->mman.buffer_funcs_ring == NULL ||
-           !adev->mman.buffer_funcs_ring->ready) {
+           (!adev->mman.buffer_funcs_ring->ready && !adev->in_gpu_reset)) {
                /* use memcpy */
                goto memcpy;
        }
@@ -1656,6 +1656,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, 
uint64_t src_offset,
        amdgpu_ring_pad_ib(ring, &job->ibs[0]);
        WARN_ON(job->ibs[0].length_dw > num_dw);
        if (direct_submit) {
+               if (!ring->ready) {
+                       r = -EINVAL;
+                       goto error_free;
+               }
+
                r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
                                       NULL, fence);
                job->fence = dma_fence_get(*fence);
@@ -1663,6 +1668,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, 
uint64_t src_offset,
                        DRM_ERROR("Error scheduling IBs (%d)\n", r);
                amdgpu_job_free(job);
        } else {
+               if (!ring->ready && !adev->in_gpu_reset) {
+                       r = -EINVAL;
+                       goto error_free;
+               }
                r = amdgpu_job_submit(job, ring, &adev->mman.entity,
                                      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
                if (r)
@@ -1692,7 +1701,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
        struct amdgpu_job *job;
        int r;
 
-       if (!ring->ready) {
+       if (!ring->ready && !adev->in_gpu_reset) {
                DRM_ERROR("Trying to clear memory with ring turned off.\n");
                return -EINVAL;
        }
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to