Need to re-add the bad job to the pending list before we
restart the scheduler.

Reviewed-by: Jesse Zhang <[email protected]>
Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 1daa9145b217e..ec8d74db62758 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -135,8 +135,14 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct 
drm_sched_job *s_job)
            ring->funcs->reset) {
                dev_err(adev->dev, "Starting %s ring reset\n",
                        s_job->sched->name);
+               /* Stop the scheduler to prevent anybody else from touching the 
ring buffer. */
+               drm_sched_wqueue_stop(&ring->sched);
                r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
                if (!r) {
+                       /* add the job back to the pending list */
+                       list_add(&s_job->list, &s_job->sched->pending_list);
+                       /* Start the scheduler again */
+                       drm_sched_wqueue_start(&ring->sched);
                        atomic_inc(&ring->adev->gpu_reset_counter);
                        dev_err(adev->dev, "Ring %s reset succeeded\n",
                                ring->sched.name);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index b82357c657237..129ad51386535 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -868,8 +868,6 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
 void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
                                    struct amdgpu_fence *guilty_fence)
 {
-       /* Stop the scheduler to prevent anybody else from touching the ring 
buffer. */
-       drm_sched_wqueue_stop(&ring->sched);
        /* back up the non-guilty commands */
        amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence);
 }
@@ -895,8 +893,6 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
                        amdgpu_ring_write(ring, ring->ring_backup[i]);
                amdgpu_ring_commit(ring);
        }
-       /* Start the scheduler again */
-       drm_sched_wqueue_start(&ring->sched);
        return 0;
 }
 
-- 
2.52.0

Reply via email to