[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Jesse Zhang <[email protected]>
> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Alex
> Deucher
> Sent: Tuesday, January 20, 2026 9:34 AM
> To: [email protected]
> Cc: Deucher, Alexander <[email protected]>
> Subject: [PATCH 01/10] drm/amdgpu: re-add the bad job to the pending list for 
> ring
> resets
>
> Need to re-add the bad job to the pending list before we restart the 
> scheduler.
>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c  | 6 ++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 4 ----
>  2 files changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 1daa9145b217e..ec8d74db62758 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -135,8 +135,14 @@ static enum drm_gpu_sched_stat
> amdgpu_job_timedout(struct drm_sched_job *s_job)
>           ring->funcs->reset) {
>               dev_err(adev->dev, "Starting %s ring reset\n",
>                       s_job->sched->name);
> +             /* Stop the scheduler to prevent anybody else from touching the 
> ring
> buffer. */
> +             drm_sched_wqueue_stop(&ring->sched);
>               r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
>               if (!r) {
> +                     /* add the job back to the pending list */
> +                     list_add(&s_job->list, &s_job->sched->pending_list);
> +                     /* Start the scheduler again */
> +                     drm_sched_wqueue_start(&ring->sched);
>                       atomic_inc(&ring->adev->gpu_reset_counter);
>                       dev_err(adev->dev, "Ring %s reset succeeded\n",
>                               ring->sched.name);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index b82357c657237..129ad51386535 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -868,8 +868,6 @@ bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
> void amdgpu_ring_reset_helper_begin(struct amdgpu_ring *ring,
>                                   struct amdgpu_fence *guilty_fence)  {
> -     /* Stop the scheduler to prevent anybody else from touching the ring 
> buffer.
> */
> -     drm_sched_wqueue_stop(&ring->sched);
>       /* back up the non-guilty commands */
>       amdgpu_ring_backup_unprocessed_commands(ring, guilty_fence);  } @@ -
> 895,8 +893,6 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
>                       amdgpu_ring_write(ring, ring->ring_backup[i]);
>               amdgpu_ring_commit(ring);
>       }
> -     /* Start the scheduler again */
> -     drm_sched_wqueue_start(&ring->sched);
>       return 0;
>  }
>
> --
> 2.52.0

Reply via email to