[AMD Official Use Only - AMD Internal Distribution Only]
Reviewed-by: Jesse Zhang <[email protected]>
> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Alex
> Deucher
> Sent: Tuesday, January 20, 2026 9:34 AM
> To: [email protected]
> Cc: Deucher, Alexander <[email protected]>
> Subject: [PATCH 01/10] drm/amdgpu: re-add the bad job to the pending list for
> ring
> resets
>
> Need to re-add the bad job to the pending list before we restart the
> scheduler.
>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 ++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ----
> 2 files changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 1daa9145b217e..ec8d74db62758 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -135,8 +135,14 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job)
> ring->funcs->reset) {
> dev_err(adev->dev, "Starting %s ring reset\n",
> s_job->sched->name);
> + /* Stop the scheduler to prevent anybody else from touching the
> ring
> buffer. */
> + drm_sched_wqueue_stop(&ring->sched);
> r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
> if (!r) {
> + /* add the job back to the pending list */
> + list_add(&s_job->list, &s_job->sched->pending_list);
> + /* Start the scheduler again */
> + drm_sched_wqueue_start(&ring->sched);
> atomic_inc(&ring->adev->gpu_reset_counter);
> dev_err(adev->dev, "Ring %s reset succeeded\n",
> ring->sched.name);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index b82357c657237..129ad51386535 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -868,8 +868,6 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
> void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
> struct amdgpu_fence *guilty_fence) {
> - /* Stop the scheduler to prevent anybody else from touching the ring
> buffer.
> */
> - drm_sched_wqueue_stop(&ring->sched);
> /* back up the non-guilty commands */
> amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence); } @@ -
> 895,8 +893,6 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
> amdgpu_ring_write(ring, ring->ring_backup[i]);
> amdgpu_ring_commit(ring);
> }
> - /* Start the scheduler again */
> - drm_sched_wqueue_start(&ring->sched);
> return 0;
> }
>
> --
> 2.52.0