amdgpu: rework ring reset backup and reemit v4

Alex Deucher Tue, 20 Jan 2026 19:01:45 -0800

Store the start wptr and ib size in the IB fence. On queue
reset, save the ring contents of all IBs. Since the VM fence
is a sub-fence of the the IB fence, the IB fence stores the
state for both fences as the IB state encapsulates the VM fence
state.


For reemit, reemit the entire IB state for non-guilty contexts.
For guilty contexts, replace the IB submission with nops, but reemit
the rest.  Special handling is required for the vm fence as that
likely signalled before the job fence even if the job ultimately
hung.  Skip the vm state if the vm fence signalled.  Split the reemit
per fence and when we reemit, update the wptr with the new values
from reemit.  This allows us to reemit jobs repeatedly as the wptrs get
properly updated each time.

v2: further simplify the logic
v3: reemit vm state, not just vm fence
v4: just nop the IB and possibly the VM portion of the submission

Signed-off-by: Alex Deucher <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 108 +++++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c    |  20 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c  |  46 ++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h  |  24 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |   2 +
 5 files changed, 83 insertions(+), 117 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d48f61076c06a..68642aab43177 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -89,16 +89,6 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
        return seq;
 }
 
-static void amdgpu_fence_save_fence_wptr_start(struct amdgpu_fence *af)
-{
-       af->fence_wptr_start = af->ring->wptr;
-}
-
-static void amdgpu_fence_save_fence_wptr_end(struct amdgpu_fence *af)
-{
-       af->fence_wptr_end = af->ring->wptr;
-}
-
 /**
  * amdgpu_fence_emit - emit a fence on the requested ring
  *
@@ -126,11 +116,9 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct 
amdgpu_fence *af,
                       &ring->fence_drv.lock,
                       adev->fence_context + ring->idx, seq);
 
-       amdgpu_fence_save_fence_wptr_start(af);
        amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
                               seq, flags | AMDGPU_FENCE_FLAG_INT);
-       amdgpu_fence_save_fence_wptr_end(af);
-       amdgpu_fence_save_wptr(af);
+
        pm_runtime_get_noresume(adev_to_drm(adev)->dev);
        ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
        if (unlikely(rcu_dereference_protected(*ptr, 1))) {
@@ -241,7 +229,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
 
        do {
                struct dma_fence *fence, **ptr;
-               struct amdgpu_fence *am_fence;
 
                ++last_seq;
                last_seq &= drv->num_fences_mask;
@@ -254,12 +241,6 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring)
                if (!fence)
                        continue;
 
-               /* Save the wptr in the fence driver so we know what the last 
processed
-                * wptr was.  This is required for re-emitting the ring state 
for
-                * queues that are reset but are not guilty and thus have no 
guilty fence.
-                */
-               am_fence = container_of(fence, struct amdgpu_fence, base);
-               drv->signalled_wptr = am_fence->wptr;
                dma_fence_signal(fence);
                dma_fence_put(fence);
                pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -727,25 +708,28 @@ void amdgpu_fence_driver_force_completion(struct 
amdgpu_ring *ring,
  */
 
 /**
- * amdgpu_fence_driver_update_timedout_fence_state - Update fence state and 
set errors
+ * amdgpu_ring_set_fence_errors_and_reemit - Set dma_fence errors and reemit
  *
- * @af: fence of the ring to update
+ * @guilty_fence: fence of the ring to update
  *
  */
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+                                            struct amdgpu_fence *guilty_fence)
 {
        struct dma_fence *unprocessed;
        struct dma_fence __rcu **ptr;
        struct amdgpu_fence *fence;
-       struct amdgpu_ring *ring = af->ring;
        unsigned long flags;
        u32 seq, last_seq;
-       bool reemitted = false;
+       unsigned int i;
+       bool is_guilty_fence;
+       bool is_guilty_context;
 
        last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
        seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
 
-       /* mark all fences from the guilty context with an error */
+       ring->reemit = true;
+       amdgpu_ring_alloc(ring, ring->ring_backup_entries_to_copy);
        spin_lock_irqsave(&ring->fence_drv.lock, flags);
        do {
                last_seq++;
@@ -757,39 +741,53 @@ void 
amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af)
 
                if (unprocessed && !dma_fence_is_signaled_locked(unprocessed)) {
                        fence = container_of(unprocessed, struct amdgpu_fence, 
base);
+                       is_guilty_fence = fence == guilty_fence;
+                       is_guilty_context = fence->context == 
guilty_fence->context;
 
-                       if (fence->reemitted > 1)
-                               reemitted = true;
-                       else if (fence == af)
+                       /* mark all fences from the guilty context with an 
error */
+                       if (is_guilty_fence)
                                dma_fence_set_error(&fence->base, -ETIME);
-                       else if (fence->context == af->context)
+                       else if (is_guilty_context)
                                dma_fence_set_error(&fence->base, -ECANCELED);
+
+                       /* Non-vm fence has all the state. */
+                       if (!fence->is_vm_fence) {
+                               /* reemit the packet stream and update wptrs */
+                               fence->ib_wptr = ring->wptr;
+                               for (i = 0; i < fence->ib_dw_size; i++) {
+                                       /* Skip the IB(s) for the guilty 
context. */
+                                       if (is_guilty_fence && fence->vm_fence 
&&
+                                           
dma_fence_is_signaled_locked(&fence->vm_fence->base) &&
+                                           i >= fence->skip_vm_dw_start_offset 
&&
+                                           i < fence->skip_vm_dw_end_offset)
+                                               amdgpu_ring_write(ring, 
ring->funcs->nop);
+                                       else if (is_guilty_context &&
+                                                i >= 
fence->skip_ib_dw_start_offset &&
+                                                i < 
fence->skip_ib_dw_end_offset)
+                                               amdgpu_ring_write(ring, 
ring->funcs->nop);
+                                       else
+                                               amdgpu_ring_write(ring,
+                                                                 
ring->ring_backup[fence->backup_idx + i]);
+                               }
+                       }
                }
                rcu_read_unlock();
        } while (last_seq != seq);
        spin_unlock_irqrestore(&ring->fence_drv.lock, flags);
-
-       if (reemitted) {
-               /* if we've already reemitted once then just cancel everything 
*/
-               amdgpu_fence_driver_force_completion(af->ring, &af->base);
-               af->ring->ring_backup_entries_to_copy = 0;
-       }
-}
-
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af)
-{
-       af->wptr = af->ring->wptr;
+       amdgpu_ring_commit(ring);
+       ring->reemit = false;
 }
 
 static void amdgpu_ring_backup_unprocessed_command(struct amdgpu_ring *ring,
-                                                  u64 start_wptr, u64 end_wptr)
+                                                  struct amdgpu_fence *af)
 {
-       unsigned int first_idx = start_wptr & ring->buf_mask;
-       unsigned int last_idx = end_wptr & ring->buf_mask;
+       unsigned int first_idx = af->ib_wptr & ring->buf_mask;
+       unsigned int dw_size = af->ib_dw_size;
        unsigned int i;
 
+       af->backup_idx = ring->ring_backup_entries_to_copy;
        /* Backup the contents of the ring buffer. */
-       for (i = first_idx; i != last_idx; ++i, i &= ring->buf_mask)
+       for (i = first_idx; dw_size > 0; ++i, i &= ring->buf_mask, --dw_size)
                ring->ring_backup[ring->ring_backup_entries_to_copy++] = 
ring->ring[i];
 }
 
@@ -799,12 +797,10 @@ void amdgpu_ring_backup_unprocessed_commands(struct 
amdgpu_ring *ring,
        struct dma_fence *unprocessed;
        struct dma_fence __rcu **ptr;
        struct amdgpu_fence *fence;
-       u64 wptr;
        u32 seq, last_seq;
 
        last_seq = amdgpu_fence_read(ring) & ring->fence_drv.num_fences_mask;
        seq = ring->fence_drv.sync_seq & ring->fence_drv.num_fences_mask;
-       wptr = ring->fence_drv.signalled_wptr;
        ring->ring_backup_entries_to_copy = 0;
 
        do {
@@ -818,21 +814,9 @@ void amdgpu_ring_backup_unprocessed_commands(struct 
amdgpu_ring *ring,
                if (unprocessed && !dma_fence_is_signaled(unprocessed)) {
                        fence = container_of(unprocessed, struct amdgpu_fence, 
base);
 
-                       /* save everything if the ring is not guilty, otherwise
-                        * just save the content from other contexts.
-                        */
-                       if (!fence->reemitted &&
-                           (!guilty_fence || (fence->context != 
guilty_fence->context))) {
-                               amdgpu_ring_backup_unprocessed_command(ring, 
wptr,
-                                                                      
fence->wptr);
-                       } else if (!fence->reemitted) {
-                               /* always save the fence */
-                               amdgpu_ring_backup_unprocessed_command(ring,
-                                                                      
fence->fence_wptr_start,
-                                                                      
fence->fence_wptr_end);
-                       }
-                       wptr = fence->wptr;
-                       fence->reemitted++;
+                       /* Non-vm fence has all the state. */
+                       if (!fence->is_vm_fence)
+                               amdgpu_ring_backup_unprocessed_command(ring, 
fence);
                }
                rcu_read_unlock();
        } while (last_seq != seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 78987ecdfe03a..3e031e9e73e64 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -135,7 +135,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
        bool need_pipe_sync = false;
        unsigned int cond_exec;
        unsigned int i;
-       int r = 0;
+       int r = 0, count_dw;
 
        if (!job)
                return -EINVAL;
@@ -178,6 +178,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
                dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
                return r;
        }
+       af->ib_wptr = ring->wptr;
+       count_dw = ring->count_dw;
 
        need_ctx_switch = ring->current_ctx != fence_ctx;
        if (ring->funcs->emit_pipeline_sync && job &&
@@ -202,11 +204,15 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
        if (ring->funcs->insert_start)
                ring->funcs->insert_start(ring);
 
+       /* Skip the VM for guilty contexts */
+       af->skip_vm_dw_start_offset = count_dw - ring->count_dw;
        r = amdgpu_vm_flush(ring, job, need_pipe_sync);
        if (r) {
                amdgpu_ring_undo(ring);
                return r;
        }
+       /* Skip the IB for guilty contexts */
+       af->skip_vm_dw_end_offset = count_dw - ring->count_dw;
 
        amdgpu_ring_ib_begin(ring);
 
@@ -218,6 +224,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
                cond_exec = amdgpu_ring_init_cond_exec(ring,
                                                       ring->cond_exe_gpu_addr);
 
+       /* Skip the IB for guilty contexts */
+       af->skip_ib_dw_start_offset = count_dw - ring->count_dw;
        amdgpu_device_flush_hdp(adev, ring);
 
        if (need_ctx_switch)
@@ -256,6 +264,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
                amdgpu_ring_emit_frame_cntl(ring, false, secure);
 
        amdgpu_device_invalidate_hdp(adev, ring);
+       /* Skip the IB for guilty contexts */
+       af->skip_ib_dw_end_offset = count_dw - ring->count_dw;
 
        if (ib->flags & AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE)
                fence_flags |= AMDGPU_FENCE_FLAG_TC_WB_ONLY;
@@ -296,13 +306,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
            ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
                ring->funcs->emit_wave_limit(ring, false);
 
-       /* Save the wptr associated with this fence.
-        * This must be last for resets to work properly
-        * as we need to save the wptr associated with this
-        * fence so we know what rings contents to backup
-        * after we reset the queue.
-        */
-       amdgpu_fence_save_wptr(af);
+       af->ib_dw_size = count_dw - ring->count_dw;
 
        amdgpu_ring_ib_end(ring);
        amdgpu_ring_commit(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 129ad51386535..83750ab4e81b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -90,10 +90,13 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned 
int ndw)
        ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
 
        /* Make sure we aren't trying to allocate more space
-        * than the maximum for one submission
+        * than the maximum for one submission.  Skip for reemit
+        * since we may be reemitting several submissions.
         */
-       if (WARN_ON_ONCE(ndw > ring->max_dw))
-               return -ENOMEM;
+       if (!ring->reemit) {
+               if (WARN_ON_ONCE(ndw > ring->max_dw))
+                       return -ENOMEM;
+       }
 
        ring->count_dw = ndw;
        ring->wptr_old = ring->wptr;
@@ -104,29 +107,6 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned 
int ndw)
        return 0;
 }
 
-/**
- * amdgpu_ring_alloc_reemit - allocate space on the ring buffer for reemit
- *
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Allocate @ndw dwords in the ring buffer (all asics).
- * doesn't check the max_dw limit as we may be reemitting
- * several submissions.
- */
-static void amdgpu_ring_alloc_reemit(struct amdgpu_ring *ring, unsigned int 
ndw)
-{
-       /* Align requested size with padding so unlock_commit can
-        * pad safely */
-       ndw = (ndw + ring->funcs->align_mask) & ~ring->funcs->align_mask;
-
-       ring->count_dw = ndw;
-       ring->wptr_old = ring->wptr;
-
-       if (ring->funcs->begin_use)
-               ring->funcs->begin_use(ring);
-}
-
 /**
  * amdgpu_ring_insert_nop - insert NOP packets
  *
@@ -875,7 +855,6 @@ void amdgpu_ring_reset_helper_begin(struct amdgpu_ring 
*ring,
 int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
                                 struct amdgpu_fence *guilty_fence)
 {
-       unsigned int i;
        int r;
 
        /* verify that the ring is functional */
@@ -883,16 +862,9 @@ int amdgpu_ring_reset_helper_end(struct amdgpu_ring *ring,
        if (r)
                return r;
 
-       /* set an error on all fences from the context */
-       if (guilty_fence)
-               amdgpu_fence_driver_update_timedout_fence_state(guilty_fence);
-       /* Re-emit the non-guilty commands */
-       if (ring->ring_backup_entries_to_copy) {
-               amdgpu_ring_alloc_reemit(ring, 
ring->ring_backup_entries_to_copy);
-               for (i = 0; i < ring->ring_backup_entries_to_copy; i++)
-                       amdgpu_ring_write(ring, ring->ring_backup[i]);
-               amdgpu_ring_commit(ring);
-       }
+       /* set an error on all fences from the context and reemit */
+       amdgpu_ring_set_fence_errors_and_reemit(ring, guilty_fence);
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index ce095427611fb..c6f674c457620 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -121,7 +121,6 @@ struct amdgpu_fence_driver {
        /* sync_seq is protected by ring emission lock */
        uint32_t                        sync_seq;
        atomic_t                        last_seq;
-       u64                             signalled_wptr;
        bool                            initialized;
        struct amdgpu_irq_src           *irq_src;
        unsigned                        irq_type;
@@ -146,15 +145,19 @@ struct amdgpu_fence {
        struct amdgpu_ring              *ring;
        ktime_t                         start_timestamp;
 
-       /* wptr for the total submission for resets */
-       u64                             wptr;
+       bool                            is_vm_fence;
+       /* location and size of the IB */
+       u64                             ib_wptr;
+       unsigned int                    ib_dw_size;
+       unsigned int                    skip_vm_dw_start_offset;
+       unsigned int                    skip_vm_dw_end_offset;
+       unsigned int                    skip_ib_dw_start_offset;
+       unsigned int                    skip_ib_dw_end_offset;
        /* fence context for resets */
        u64                             context;
-       /* has this fence been reemitted */
-       unsigned int                    reemitted;
-       /* wptr for the fence for the submission */
-       u64                             fence_wptr_start;
-       u64                             fence_wptr_end;
+       /* idx for ring backups */
+       unsigned int                    backup_idx;
+       struct amdgpu_fence             *vm_fence;
 };
 
 extern const struct drm_sched_backend_ops amdgpu_sched_ops;
@@ -162,8 +165,8 @@ extern const struct drm_sched_backend_ops amdgpu_sched_ops;
 void amdgpu_fence_driver_set_error(struct amdgpu_ring *ring, int error);
 void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring,
                                          struct dma_fence *timedout_fence);
-void amdgpu_fence_driver_update_timedout_fence_state(struct amdgpu_fence *af);
-void amdgpu_fence_save_wptr(struct amdgpu_fence *af);
+void amdgpu_ring_set_fence_errors_and_reemit(struct amdgpu_ring *ring,
+                                            struct amdgpu_fence *guilty_fence);
 
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
 int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
@@ -314,6 +317,7 @@ struct amdgpu_ring {
        /* backups for resets */
        uint32_t                *ring_backup;
        unsigned int            ring_backup_entries_to_copy;
+       bool                    reemit;
        unsigned                rptr_offs;
        u64                     rptr_gpu_addr;
        u32                     *rptr_cpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6a2ea200d90c8..b7518eb4f05c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -848,6 +848,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct 
amdgpu_job *job,
                r = amdgpu_fence_emit(ring, job->hw_vm_fence, 0);
                if (r)
                        return r;
+               job->hw_vm_fence->is_vm_fence = true;
+               job->hw_fence->vm_fence = job->hw_vm_fence;
                fence = &job->hw_vm_fence->base;
                /* get a ref for the job */
                dma_fence_get(fence);
-- 
2.52.0

[PATCH 10/10] drm/amdgpu: rework ring reset backup and reemit v4

Reply via email to