[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Jesse Zhang <[email protected]>

> -----Original Message-----
> From: amd-gfx <[email protected]> On Behalf Of Alex
> Deucher
> Sent: Wednesday, January 21, 2026 11:01 AM
> To: [email protected]
> Cc: Deucher, Alexander <[email protected]>
> Subject: [PATCH 10/10] drm/amdgpu: rework ring reset backup and reemit v4
>
> Store the start wptr and ib size in the IB fence. On queue reset, save the 
> ring
> contents of all IBs. Since the VM fence is a sub-fence of the the IB fence, 
> the IB
> fence stores the state for both fences as the IB state encapsulates the VM 
> fence
> state.
>
> For reemit, reemit the entire IB state for non-guilty contexts.
> For guilty contexts, replace the IB submission with nops, but reemit the rest.
> Special handling is required for the vm fence as that likely signalled before 
> the job
> fence even if the job ultimately hung.  Skip the vm state if the vm fence 
> signalled.
> Split the reemit per fence and when we reemit, update the wptr with the new 
> values
> from reemit.  This allows us to reemit jobs repeatedly as the wptrs get 
> properly
> updated each time.
>
> v2: further simplify the logic
> v3: reemit vm state, not just vm fence
> v4: just nop the IB and possibly the VM portion of the submission
>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 108 +++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c    |  20 ++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  |  46 ++-------
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  24 +++--
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |   2 +
>  5 files changed, 83 insertions(+), 117 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index d48f61076c06a..68642aab43177 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>       return seq;
>  }
>
> -static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af) -{
> -     af->fence_wptr_start = af->ring->wptr;
> -}
> -
> -static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af) -{
> -     af->fence_wptr_end = af->ring->wptr;
> -}
> -
>  /**
>   * amdgpu_fence_emit - emit a fence on the requested ring
>   *
> @@ -126,11 +116,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct
> amdgpu_fence *af,
>                      &ring->fence_drv.lock,
>                      adev->fence_context + ring->idx, seq);
>
> -     amdgpu_fence_save_fence_wptr_start(af);
>       amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>                              seq, flags | AMDGPU_FENCE_FLAG_INT);
> -     amdgpu_fence_save_fence_wptr_end(af);
> -     amdgpu_fence_save_wptr(af);
> +
>       pm_runtime_get_noresume(adev_to_drm(adev)->dev);
>       ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
>       if (unlikely(rcu_dereference_protected(*ptr, 1))) { @@ -241,7 +229,6 @@
> bool amdgpu_fence_process(struct amdgpu_ring *ring)
>
>       do {
>               struct dma_fence *fence, **ptr;
> -             struct amdgpu_fence *am_fence;
>
>               ++last_seq;
>               last_seq &= drv->num_fences_mask;
> @@ -254,12 +241,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
>               if (!fence)
>                       continue;
>
> -             /* Save the wptr in the fence driver so we know what the last
> processed
> -              * wptr was.  This is required for re-emitting the ring state 
> for
> -              * queues that are reset but are not guilty and thus have no 
> guilty
> fence.
> -              */
> -             am_fence = container_of(fence, struct amdgpu_fence, base);
> -             drv->signalled_wptr = am_fence->wptr;
>               dma_fence_signal(fence);
>               dma_fence_put(fence);
>               pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> @@ -727,25 +708,28 @@ void amdgpu_fence_driver_force_completion(struct
> amdgpu_ring *ring,
>   */
>
>  /**
> - * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and
> set errors
> + * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and
> + reemit
>   *
> - * @af: fence of the ring to update
> + * @guilty_fence: fence of the ring to update
>   *
>   */
> -void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence
> *af)
> +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
> +                                          struct amdgpu_fence *guilty_fence)
>  {
>       struct dma_fence *unprocessed;
>       struct dma_fence __rcu **ptr;
>       struct amdgpu_fence *fence;
> -     struct amdgpu_ring *ring = af->ring;
>       unsigned long flags;
>       u32 seq, last_seq;
> -     bool reemitted = false;
> +     unsigned int i;
> +     bool is_guilty_fence;
> +     bool is_guilty_context;
>
>       last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
>       seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
>
> -     /* mark all fences from the guilty context with an error */
> +     ring->reemit = true;
> +     amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy);
>       spin_lock_irqsave(&ring->fence_drv.lock, flags);
>       do {
>               last_seq++;
> @@ -757,39 +741,53 @@ void
> amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
>
>               if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
>                       fence = container_of(unprocessed, struct amdgpu_fence,
> base);
> +                     is_guilty_fence = fence == guilty_fence;
> +                     is_guilty_context = fence->context == 
> guilty_fence->context;
>
> -                     if (fence->reemitted > 1)
> -                             reemitted = true;
> -                     else if (fence == af)
> +                     /* mark all fences from the guilty context with an 
> error */
> +                     if (is_guilty_fence)
>                               dma_fence_set_error(&fence->base, -ETIME);
> -                     else if (fence->context == af->context)
> +                     else if (is_guilty_context)
>                               dma_fence_set_error(&fence->base, -ECANCELED);
> +
> +                     /* Non-vm fence has all the state. */
> +                     if (!fence->is_vm_fence) {
> +                             /* reemit the packet stream and update wptrs */
> +                             fence->ib_wptr = ring->wptr;
> +                             for (i = 0; i < fence->ib_dw_size; i++) {
> +                                     /* Skip the IB(s) for the guilty 
> context. */
> +                                     if (is_guilty_fence && fence->vm_fence 
> &&
> +                                         dma_fence_is_signaled_locked(&fence-
> >vm_fence->base) &&
> +                                         i >= fence->skip_vm_dw_start_offset 
> &&
> +                                         i < fence->skip_vm_dw_end_offset)
> +                                             amdgpu_ring_write(ring, 
> ring->funcs-
> >nop);
> +                                     else if (is_guilty_context &&
> +                                              i >= 
> fence->skip_ib_dw_start_offset
> &&
> +                                              i < 
> fence->skip_ib_dw_end_offset)
> +                                             amdgpu_ring_write(ring, 
> ring->funcs-
> >nop);
> +                                     else
> +                                             amdgpu_ring_write(ring,
> +                                                               ring-
> >ring_backup[fence->backup_idx + i]);
> +                             }
> +                     }
>               }
>               rcu_read_unlock();
>       } while (last_seq != seq);
>       spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
> -
> -     if (reemitted) {
> -             /* if we've already reemitted once then just cancel everything 
> */
> -             amdgpu_fence_driver_force_completion(af->ring, &af->base);
> -             af->ring->ring_backup_entries_to_copy = 0;
> -     }
> -}
> -
> -void amdgpu_fence_save_wptr(struct amdgpu_fence *af) -{
> -     af->wptr = af->ring->wptr;
> +     amdgpu_ring_commit(ring);
> +     ring->reemit = false;
>  }
>
>  static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring
> *ring,
> -                                                u64 start_wptr, u64 end_wptr)
> +                                                struct amdgpu_fence *af)
>  {
> -     unsigned int first_idx = start_wptr & ring->buf_mask;
> -     unsigned int last_idx = end_wptr & ring->buf_mask;
> +     unsigned int first_idx = af->ib_wptr & ring->buf_mask;
> +     unsigned int dw_size = af->ib_dw_size;
>       unsigned int i;
>
> +     af->backup_idx = ring->ring_backup_entries_to_copy;
>       /* Backup the contents of the ring buffer. */
> -     for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
> +     for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size)
>               ring->ring_backup[ring->ring_backup_entries_to_copy++] = ring-
> >ring[i];  }
>
> @@ -799,12 +797,10 @@ void
> amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
>       struct dma_fence *unprocessed;
>       struct dma_fence __rcu **ptr;
>       struct amdgpu_fence *fence;
> -     u64 wptr;
>       u32 seq, last_seq;
>
>       last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
>       seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
> -     wptr = ring->fence_drv.signalled_wptr;
>       ring->ring_backup_entries_to_copy = 0;
>
>       do {
> @@ -818,21 +814,9 @@ void
> amdgpu_ring_backup_unprocessed_commands(struct amdgpu_ring *ring,
>               if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
>                       fence = container_of(unprocessed, struct amdgpu_fence,
> base);
>
> -                     /* save everything if the ring is not guilty, otherwise
> -                      * just save the content from other contexts.
> -                      */
> -                     if (!fence->reemitted &&
> -                         (!guilty_fence || (fence->context != 
> guilty_fence->context)))
> {
> -                             amdgpu_ring_backup_unprocessed_command(ring,
> wptr,
> -                                                                    
> fence->wptr);
> -                     } else if (!fence->reemitted) {
> -                             /* always save the fence */
> -                             amdgpu_ring_backup_unprocessed_command(ring,
> -                                                                    fence-
> >fence_wptr_start,
> -                                                                    fence-
> >fence_wptr_end);
> -                     }
> -                     wptr = fence->wptr;
> -                     fence->reemitted++;
> +                     /* Non-vm fence has all the state. */
> +                     if (!fence->is_vm_fence)
> +                             amdgpu_ring_backup_unprocessed_command(ring,
> fence);
>               }
>               rcu_read_unlock();
>       } while (last_seq != seq);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 78987ecdfe03a..3e031e9e73e64 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -135,7 +135,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct
> amdgpu_job *job,
>       bool need_pipe_sync = false;
>       unsigned int cond_exec;
>       unsigned int i;
> -     int r = 0;
> +     int r = 0, count_dw;
>
>       if (!job)
>               return -EINVAL;
> @@ -178,6 +178,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct
> amdgpu_job *job,
>               dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
>               return r;
>       }
> +     af->ib_wptr = ring->wptr;
> +     count_dw = ring->count_dw;
>
>       need_ctx_switch = ring->current_ctx != fence_ctx;
>       if (ring->funcs->emit_pipeline_sync && job && @@ -202,11 +204,15 @@ int
> amdgpu_ib_schedule(struct amdgpu_ring *ring, struct amdgpu_job *job,
>       if (ring->funcs->insert_start)
>               ring->funcs->insert_start(ring);
>
> +     /* Skip the VM for guilty contexts */
> +     af->skip_vm_dw_start_offset = count_dw - ring->count_dw;
>       r = amdgpu_vm_flush(ring, job, need_pipe_sync);
>       if (r) {
>               amdgpu_ring_undo(ring);
>               return r;
>       }
> +     /* Skip the IB for guilty contexts */
> +     af->skip_vm_dw_end_offset = count_dw - ring->count_dw;
>
>       amdgpu_ring_ib_begin(ring);
>
> @@ -218,6 +224,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct
> amdgpu_job *job,
>               cond_exec = amdgpu_ring_init_cond_exec(ring,
>                                                      ring->cond_exe_gpu_addr);
>
> +     /* Skip the IB for guilty contexts */
> +     af->skip_ib_dw_start_offset = count_dw - ring->count_dw;
>       amdgpu_device_flush_hdp(adev, ring);
>
>       if (need_ctx_switch)
> @@ -256,6 +264,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct
> amdgpu_job *job,
>               amdgpu_ring_emit_frame_cntl(ring, false, secure);
>
>       amdgpu_device_invalidate_hdp(adev, ring);
> +     /* Skip the IB for guilty contexts */
> +     af->skip_ib_dw_end_offset = count_dw - ring->count_dw;
>
>       if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
>               fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY; @@ -
> 296,13 +306,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct
> amdgpu_job *job,
>           ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
>               ring->funcs->emit_wave_limit(ring, false);
>
> -     /* Save the wptr associated with this fence.
> -      * This must be last for resets to work properly
> -      * as we need to save the wptr associated with this
> -      * fence so we know what rings contents to backup
> -      * after we reset the queue.
> -      */
> -     amdgpu_fence_save_wptr(af);
> +     af->ib_dw_size = count_dw - ring->count_dw;
>
>       amdgpu_ring_ib_end(ring);
>       amdgpu_ring_commit(ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 129ad51386535..83750ab4e81b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned
> int ndw)
>       ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
>
>       /* Make sure we aren't trying to allocate more space
> -      * than the maximum for one submission
> +      * than the maximum for one submission.  Skip for reemit
> +      * since we may be reemitting several submissions.
>        */
> -     if (WARN_ON_ONCE(ndw > ring->max_dw))
> -             return -ENOMEM;
> +     if (!ring->reemit) {
> +             if (WARN_ON_ONCE(ndw > ring->max_dw))
> +                     return -ENOMEM;
> +     }
>
>       ring->count_dw = ndw;
>       ring->wptr_old = ring->wptr;
> @@ -104,29 +107,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned
> int ndw)
>       return 0;
>  }
>
> -/**
> - * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
> - *
> - * @ring: amdgpu_ring structure holding ring information
> - * @ndw: number of dwords to allocate in the ring buffer
> - *
> - * Allocate @ndw dwords in the ring buffer (all asics).
> - * doesn't check the max_dw limit as we may be reemitting
> - * several submissions.
> - */
> -static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int 
> ndw) -
> {
> -     /* Align requested size with padding so unlock_commit can
> -      * pad safely */
> -     ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
> -
> -     ring->count_dw = ndw;
> -     ring->wptr_old = ring->wptr;
> -
> -     if (ring->funcs->begin_use)
> -             ring->funcs->begin_use(ring);
> -}
> -
>  /**
>   * amdgpu_ring_insert_nop - insert NOP packets
>   *
> @@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring
> *ring,  int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
>                                struct amdgpu_fence *guilty_fence)  {
> -     unsigned int i;
>       int r;
>
>       /* verify that the ring is functional */ @@ -883,16 +862,9 @@ int
> amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
>       if (r)
>               return r;
>
> -     /* set an error on all fences from the context */
> -     if (guilty_fence)
> -             amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
> -     /* Re-emit the non-guilty commands */
> -     if (ring->ring_backup_entries_to_copy) {
> -             amdgpu_ring_alloc_reemit(ring, 
> ring->ring_backup_entries_to_copy);
> -             for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
> -                     amdgpu_ring_write(ring, ring->ring_backup[i]);
> -             amdgpu_ring_commit(ring);
> -     }
> +     /* set an error on all fences from the context and reemit */
> +     amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
> +
>       return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index ce095427611fb..c6f674c457620 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -121,7 +121,6 @@ struct amdgpu_fence_driver {
>       /* sync_seq is protected by ring emission lock */
>       uint32_t                        sync_seq;
>       atomic_t                        last_seq;
> -     u64                             signalled_wptr;
>       bool                            initialized;
>       struct amdgpu_irq_src           *irq_src;
>       unsigned                        irq_type;
> @@ -146,15 +145,19 @@ struct amdgpu_fence {
>       struct amdgpu_ring              *ring;
>       ktime_t                         start_timestamp;
>
> -     /* wptr for the total submission for resets */
> -     u64                             wptr;
> +     bool                            is_vm_fence;
> +     /* location and size of the IB */
> +     u64                             ib_wptr;
> +     unsigned int                    ib_dw_size;
> +     unsigned int                    skip_vm_dw_start_offset;
> +     unsigned int                    skip_vm_dw_end_offset;
> +     unsigned int                    skip_ib_dw_start_offset;
> +     unsigned int                    skip_ib_dw_end_offset;
>       /* fence context for resets */
>       u64                             context;
> -     /* has this fence been reemitted */
> -     unsigned int                    reemitted;
> -     /* wptr for the fence for the submission */
> -     u64                             fence_wptr_start;
> -     u64                             fence_wptr_end;
> +     /* idx for ring backups */
> +     unsigned int                    backup_idx;
> +     struct amdgpu_fence             *vm_fence;
>  };
>
>  extern const struct drm_sched_backend_ops amdgpu_sched_ops; @@ -162,8
> +165,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops;
> void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);  void
> amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring,
>                                         struct dma_fence *timedout_fence); 
> -void
> amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af); -
> void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
> +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
> +                                          struct amdgpu_fence *guilty_fence);
>
>  int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);  int
> amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, @@ -314,6 +317,7 @@
> struct amdgpu_ring {
>       /* backups for resets */
>       uint32_t                *ring_backup;
>       unsigned int            ring_backup_entries_to_copy;
> +     bool                    reemit;
>       unsigned                rptr_offs;
>       u64                     rptr_gpu_addr;
>       u32                     *rptr_cpu_addr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 6a2ea200d90c8..b7518eb4f05c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -848,6 +848,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct
> amdgpu_job *job,
>               r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
>               if (r)
>                       return r;
> +             job->hw_vm_fence->is_vm_fence = true;
> +             job->hw_fence->vm_fence = job->hw_vm_fence;
>               fence = &job->hw_vm_fence->base;
>               /* get a ref for the job */
>               dma_fence_get(fence);
> --
> 2.52.0

Reply via email to