Need to re-add the bad job to the pending list before we restart the scheduler.
Reviewed-by: Jesse Zhang <[email protected]> Signed-off-by: Alex Deucher <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 1daa9145b217e..ec8d74db62758 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -135,8 +135,14 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ring->funcs->reset) { dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); + /* Stop the scheduler to prevent anybody else from touching the ring buffer. */ + drm_sched_wqueue_stop(&ring->sched); r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence); if (!r) { + /* add the job back to the pending list */ + list_add(&s_job->list, &s_job->sched->pending_list); + /* Start the scheduler again */ + drm_sched_wqueue_start(&ring->sched); atomic_inc(&ring->adev->gpu_reset_counter); dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index b82357c657237..129ad51386535 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -868,8 +868,6 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring, struct amdgpu_fence *guilty_fence) { - /* Stop the scheduler to prevent anybody else from touching the ring buffer. */ - drm_sched_wqueue_stop(&ring->sched); /* back up the non-guilty commands */ amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence); } @@ -895,8 +893,6 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring, amdgpu_ring_write(ring, ring->ring_backup[i]); amdgpu_ring_commit(ring); } - /* Start the scheduler again */ - drm_sched_wqueue_start(&ring->sched); return 0; } -- 2.52.0
