On Fri, Jan 23, 2026 at 12:57 PM Alex Deucher <[email protected]> wrote:
>
> Store the start wptr and ib size in the IB fence. On queue
> reset, save the ring contents of all IBs. Since the VM fence
> is a sub-fence of the the IB fence, the IB fence stores the
> state for both fences as the IB state encapsulates the VM fence
> state.

This last sentence is wrong as of v6 of this patch.  Fixed up locally.

Alex

>
> For reemit, reemit the entire IB state for non-guilty contexts.
> For guilty contexts, replace the IB submission with nops, but reemit
> the rest.  Special handling is required for the vm fence as that
> likely signalled before the job fence even if the job ultimately
> hung.  Skip the vm state if the vm fence signalled.  Split the reemit
> per fence and when we reemit, update the wptr with the new values
> from reemit.  This allows us to reemit jobs repeatedly as the wptrs get
> properly updated each time.
>
> v2: further simplify the logic
> v3: reemit vm state, not just vm fence
> v4: just nop the IB and possibly the VM portion of the submission
> v5: simplify the vm fence check
> v6: split the vm and ib fences
>
> Reviewed-by: Jesse Zhang <[email protected]>
> Signed-off-by: Alex Deucher <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 98 +++++++++--------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c    | 27 ++++---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  | 46 +++--------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  | 20 ++---
>  4 files changed, 73 insertions(+), 118 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index d48f61076c06a..dcd313e09f379 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>         return seq;
>  }
>
> -static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
> -{
> -       af->fence_wptr_start = af->ring->wptr;
> -}
> -
> -static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
> -{
> -       af->fence_wptr_end = af->ring->wptr;
> -}
> -
>  /**
>   * amdgpu_fence_emit - emit a fence on the requested ring
>   *
> @@ -126,11 +116,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
> amdgpu_fence *af,
>                        &ring->fence_drv.lock,
>                        adev->fence_context + ring->idx, seq);
>
> -       amdgpu_fence_save_fence_wptr_start(af);
>         amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>                                seq, flags | AMDGPU_FENCE_FLAG_INT);
> -       amdgpu_fence_save_fence_wptr_end(af);
> -       amdgpu_fence_save_wptr(af);
> +
>         pm_runtime_get_noresume(adev_to_drm(adev)->dev);
>         ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
>         if (unlikely(rcu_dereference_protected(*ptr, 1))) {
> @@ -241,7 +229,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
>
>         do {
>                 struct dma_fence *fence, **ptr;
> -               struct amdgpu_fence *am_fence;
>
>                 ++last_seq;
>                 last_seq &= drv->num_fences_mask;
> @@ -254,12 +241,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
>                 if (!fence)
>                         continue;
>
> -               /* Save the wptr in the fence driver so we know what the last 
> processed
> -                * wptr was.  This is required for re-emitting the ring state 
> for
> -                * queues that are reset but are not guilty and thus have no 
> guilty fence.
> -                */
> -               am_fence = container_of(fence, struct amdgpu_fence, base);
> -               drv->signalled_wptr = am_fence->wptr;
>                 dma_fence_signal(fence);
>                 dma_fence_put(fence);
>                 pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
> @@ -727,25 +708,28 @@ void amdgpu_fence_driver_force_completion(struct 
> amdgpu_ring *ring,
>   */
>
>  /**
> - * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and 
> set errors
> + * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit
>   *
> - * @af: fence of the ring to update
> + * @guilty_fence: fence of the ring to update
>   *
>   */
> -void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
> +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
> +                                            struct amdgpu_fence 
> *guilty_fence)
>  {
>         struct dma_fence *unprocessed;
>         struct dma_fence __rcu **ptr;
>         struct amdgpu_fence *fence;
> -       struct amdgpu_ring *ring = af->ring;
>         unsigned long flags;
>         u32 seq, last_seq;
> -       bool reemitted = false;
> +       unsigned int i;
> +       bool is_guilty_fence;
> +       bool is_guilty_context;
>
>         last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
>         seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
>
> -       /* mark all fences from the guilty context with an error */
> +       ring->reemit = true;
> +       amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy);
>         spin_lock_irqsave(&ring->fence_drv.lock, flags);
>         do {
>                 last_seq++;
> @@ -757,39 +741,45 @@ void 
> amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
>
>                 if (unprocessed && 
> !dma_fence_is_signaled_locked(unprocessed)) {
>                         fence = container_of(unprocessed, struct 
> amdgpu_fence, base);
> +                       is_guilty_fence = fence == guilty_fence;
> +                       is_guilty_context = fence->context == 
> guilty_fence->context;
>
> -                       if (fence->reemitted > 1)
> -                               reemitted = true;
> -                       else if (fence == af)
> +                       /* mark all fences from the guilty context with an 
> error */
> +                       if (is_guilty_fence)
>                                 dma_fence_set_error(&fence->base, -ETIME);
> -                       else if (fence->context == af->context)
> +                       else if (is_guilty_context)
>                                 dma_fence_set_error(&fence->base, -ECANCELED);
> +
> +                       /* reemit the packet stream and update wptrs */
> +                       fence->ib_wptr = ring->wptr;
> +                       for (i = 0; i < fence->ib_dw_size; i++) {
> +                               /* Skip the IB(s) for the guilty context. */
> +                               if (is_guilty_context &&
> +                                   i >= fence->skip_ib_dw_start_offset &&
> +                                   i < fence->skip_ib_dw_end_offset)
> +                                       amdgpu_ring_write(ring, 
> ring->funcs->nop);
> +                               else
> +                                       amdgpu_ring_write(ring,
> +                                                         
> ring->ring_backup[fence->backup_idx + i]);
> +                       }
>                 }
>                 rcu_read_unlock();
>         } while (last_seq != seq);
>         spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
> -
> -       if (reemitted) {
> -               /* if we've already reemitted once then just cancel 
> everything */
> -               amdgpu_fence_driver_force_completion(af->ring, &af->base);
> -               af->ring->ring_backup_entries_to_copy = 0;
> -       }
> -}
> -
> -void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
> -{
> -       af->wptr = af->ring->wptr;
> +       amdgpu_ring_commit(ring);
> +       ring->reemit = false;
>  }
>
>  static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
> -                                                  u64 start_wptr, u64 
> end_wptr)
> +                                                  struct amdgpu_fence *af)
>  {
> -       unsigned int first_idx = start_wptr & ring->buf_mask;
> -       unsigned int last_idx = end_wptr & ring->buf_mask;
> +       unsigned int first_idx = af->ib_wptr & ring->buf_mask;
> +       unsigned int dw_size = af->ib_dw_size;
>         unsigned int i;
>
> +       af->backup_idx = ring->ring_backup_entries_to_copy;
>         /* Backup the contents of the ring buffer. */
> -       for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
> +       for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size)
>                 ring->ring_backup[ring->ring_backup_entries_to_copy++] = 
> ring->ring[i];
>  }
>
> @@ -799,12 +789,10 @@ void amdgpu_ring_backup_unprocessed_commands(struct 
> amdgpu_ring *ring,
>         struct dma_fence *unprocessed;
>         struct dma_fence __rcu **ptr;
>         struct amdgpu_fence *fence;
> -       u64 wptr;
>         u32 seq, last_seq;
>
>         last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
>         seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
> -       wptr = ring->fence_drv.signalled_wptr;
>         ring->ring_backup_entries_to_copy = 0;
>
>         do {
> @@ -818,21 +806,7 @@ void amdgpu_ring_backup_unprocessed_commands(struct 
> amdgpu_ring *ring,
>                 if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
>                         fence = container_of(unprocessed, struct 
> amdgpu_fence, base);
>
> -                       /* save everything if the ring is not guilty, 
> otherwise
> -                        * just save the content from other contexts.
> -                        */
> -                       if (!fence->reemitted &&
> -                           (!guilty_fence || (fence->context != 
> guilty_fence->context))) {
> -                               amdgpu_ring_backup_unprocessed_command(ring, 
> wptr,
> -                                                                      
> fence->wptr);
> -                       } else if (!fence->reemitted) {
> -                               /* always save the fence */
> -                               amdgpu_ring_backup_unprocessed_command(ring,
> -                                                                      
> fence->fence_wptr_start,
> -                                                                      
> fence->fence_wptr_end);
> -                       }
> -                       wptr = fence->wptr;
> -                       fence->reemitted++;
> +                       amdgpu_ring_backup_unprocessed_command(ring, fence);
>                 }
>                 rcu_read_unlock();
>         } while (last_seq != seq);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index fc02756673860..2ff22a8bd7b07 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -126,6 +126,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>         struct amdgpu_ib *ib;
>         struct dma_fence *tmp = NULL;
>         struct amdgpu_fence *af;
> +       struct amdgpu_fence *vm_af;
>         bool need_ctx_switch;
>         uint64_t fence_ctx;
>         uint32_t status = 0, alloc_size;
> @@ -135,7 +136,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>         bool need_pipe_sync = false;
>         unsigned int cond_exec;
>         unsigned int i;
> -       int r = 0;
> +       int r = 0, count_dw;
>
>         if (!job)
>                 return -EINVAL;
> @@ -146,13 +147,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>         fence_ctx = job->base.s_fence ?
>                 job->base.s_fence->finished.context : 0;
>         af = job->hw_fence;
> +       vm_af = job->hw_vm_fence;
>         /* Save the context of the job for reset handling.
>          * The driver needs this so it can skip the ring
>          * contents for guilty contexts.
>          */
>         af->context = fence_ctx;
>         /* the vm fence is also part of the job's context */
> -       job->hw_vm_fence->context = fence_ctx;
> +       vm_af->context = fence_ctx;
>
>         if (!ring->sched.ready) {
>                 dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", 
> ring->name);
> @@ -192,11 +194,19 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>                 dma_fence_put(tmp);
>         }
>
> +       /* VM sequence */
> +       vm_af->ib_wptr = ring->wptr;
> +       count_dw = ring->count_dw;
>         r = amdgpu_vm_flush(ring, job, need_pipe_sync);
>         if (r) {
>                 amdgpu_ring_undo(ring);
>                 return r;
>         }
> +       vm_af->ib_dw_size = count_dw - ring->count_dw;
> +
> +       /* IB sequence */
> +       af->ib_wptr = ring->wptr;
> +       count_dw = ring->count_dw;
>
>         amdgpu_ring_ib_begin(ring);
>
> @@ -218,6 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>                 cond_exec = amdgpu_ring_init_cond_exec(ring,
>                                                        
> ring->cond_exe_gpu_addr);
>
> +       /* Skip the IB for guilty contexts */
> +       af->skip_ib_dw_start_offset = count_dw - ring->count_dw;
>         amdgpu_device_flush_hdp(adev, ring);
>
>         if (need_ctx_switch)
> @@ -256,6 +268,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>                 amdgpu_ring_emit_frame_cntl(ring, false, secure);
>
>         amdgpu_device_invalidate_hdp(adev, ring);
> +       /* Skip the IB for guilty contexts */
> +       af->skip_ib_dw_end_offset = count_dw - ring->count_dw;
>
>         if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
>                 fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
> @@ -297,13 +311,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
> amdgpu_job *job,
>                 ring->funcs->emit_wave_limit(ring, false);
>
>         amdgpu_ring_ib_end(ring);
> -       /* Save the wptr associated with this fence.
> -        * This must be last for resets to work properly
> -        * as we need to save the wptr associated with this
> -        * fence so we know what rings contents to backup
> -        * after we reset the queue.
> -        */
> -       amdgpu_fence_save_wptr(af);
> +
> +       af->ib_dw_size = count_dw - ring->count_dw;
>
>         amdgpu_ring_commit(ring);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 129ad51386535..83750ab4e81b5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned 
> int ndw)
>         ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
>
>         /* Make sure we aren't trying to allocate more space
> -        * than the maximum for one submission
> +        * than the maximum for one submission.  Skip for reemit
> +        * since we may be reemitting several submissions.
>          */
> -       if (WARN_ON_ONCE(ndw > ring->max_dw))
> -               return -ENOMEM;
> +       if (!ring->reemit) {
> +               if (WARN_ON_ONCE(ndw > ring->max_dw))
> +                       return -ENOMEM;
> +       }
>
>         ring->count_dw = ndw;
>         ring->wptr_old = ring->wptr;
> @@ -104,29 +107,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned 
> int ndw)
>         return 0;
>  }
>
> -/**
> - * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
> - *
> - * @ring: amdgpu_ring structure holding ring information
> - * @ndw: number of dwords to allocate in the ring buffer
> - *
> - * Allocate @ndw dwords in the ring buffer (all asics).
> - * doesn't check the max_dw limit as we may be reemitting
> - * several submissions.
> - */
> -static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int 
> ndw)
> -{
> -       /* Align requested size with padding so unlock_commit can
> -        * pad safely */
> -       ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
> -
> -       ring->count_dw = ndw;
> -       ring->wptr_old = ring->wptr;
> -
> -       if (ring->funcs->begin_use)
> -               ring->funcs->begin_use(ring);
> -}
> -
>  /**
>   * amdgpu_ring_insert_nop - insert NOP packets
>   *
> @@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring 
> *ring,
>  int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
>                                  struct amdgpu_fence *guilty_fence)
>  {
> -       unsigned int i;
>         int r;
>
>         /* verify that the ring is functional */
> @@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring 
> *ring,
>         if (r)
>                 return r;
>
> -       /* set an error on all fences from the context */
> -       if (guilty_fence)
> -               amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
> -       /* Re-emit the non-guilty commands */
> -       if (ring->ring_backup_entries_to_copy) {
> -               amdgpu_ring_alloc_reemit(ring, 
> ring->ring_backup_entries_to_copy);
> -               for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
> -                       amdgpu_ring_write(ring, ring->ring_backup[i]);
> -               amdgpu_ring_commit(ring);
> -       }
> +       /* set an error on all fences from the context and reemit */
> +       amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
> +
>         return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index ce095427611fb..9231896d603d8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -121,7 +121,6 @@ struct amdgpu_fence_driver {
>         /* sync_seq is protected by ring emission lock */
>         uint32_t                        sync_seq;
>         atomic_t                        last_seq;
> -       u64                             signalled_wptr;
>         bool                            initialized;
>         struct amdgpu_irq_src           *irq_src;
>         unsigned                        irq_type;
> @@ -146,15 +145,15 @@ struct amdgpu_fence {
>         struct amdgpu_ring              *ring;
>         ktime_t                         start_timestamp;
>
> -       /* wptr for the total submission for resets */
> -       u64                             wptr;
> +       /* location and size of the IB */
> +       u64                             ib_wptr;
> +       unsigned int                    ib_dw_size;
> +       unsigned int                    skip_ib_dw_start_offset;
> +       unsigned int                    skip_ib_dw_end_offset;
>         /* fence context for resets */
>         u64                             context;
> -       /* has this fence been reemitted */
> -       unsigned int                    reemitted;
> -       /* wptr for the fence for the submission */
> -       u64                             fence_wptr_start;
> -       u64                             fence_wptr_end;
> +       /* idx for ring backups */
> +       unsigned int                    backup_idx;
>  };
>
>  extern const struct drm_sched_backend_ops amdgpu_sched_ops;
> @@ -162,8 +161,8 @@ extern const struct drm_sched_backend_ops 
> amdgpu_sched_ops;
>  void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
>  void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring,
>                                           struct dma_fence *timedout_fence);
> -void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence 
> *af);
> -void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
> +void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
> +                                            struct amdgpu_fence 
> *guilty_fence);
>
>  int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
>  int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
> @@ -314,6 +313,7 @@ struct amdgpu_ring {
>         /* backups for resets */
>         uint32_t                *ring_backup;
>         unsigned int            ring_backup_entries_to_copy;
> +       bool                    reemit;
>         unsigned                rptr_offs;
>         u64                     rptr_gpu_addr;
>         u32                     *rptr_cpu_addr;
> --
> 2.52.0
>

Reply via email to