Re: [Intel-gfx] [PATCH 04/57] drm/i915: Protect against request freeing during cancellation on wedging
Chris Wilson writes: > As soon as we mark a request as completed, it may be retired. So when > cancelling a request and marking it complete, make sure we first keep a > reference to the request. > > Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala > --- > .../drm/i915/gt/intel_execlists_submission.c | 19 +++ > drivers/gpu/drm/i915/gt/intel_reset.c | 15 ++- > .../gpu/drm/i915/gt/intel_ring_submission.c | 2 +- > drivers/gpu/drm/i915/gt/mock_engine.c | 8 +--- > drivers/gpu/drm/i915/i915_request.c | 9 +++-- > drivers/gpu/drm/i915/i915_request.h | 2 +- > 6 files changed, 31 insertions(+), 24 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > index e7593df6777d..45a8ac152b88 100644 > --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c > @@ -2976,7 +2976,7 @@ static void execlists_reset_cancel(struct > intel_engine_cs *engine) > > /* Mark all executing requests as skipped. */ > list_for_each_entry(rq, >active.requests, sched.link) > - i915_request_mark_eio(rq); > + i915_request_put(i915_request_mark_eio(rq)); > intel_engine_signal_breadcrumbs(engine); > > /* Flush the queued requests to the timeline list (for retiring). */ > @@ -2984,8 +2984,10 @@ static void execlists_reset_cancel(struct > intel_engine_cs *engine) > struct i915_priolist *p = to_priolist(rb); > > priolist_for_each_request_consume(rq, rn, p) { > - i915_request_mark_eio(rq); > - __i915_request_submit(rq); > + if (i915_request_mark_eio(rq)) { > + __i915_request_submit(rq); > + i915_request_put(rq); > + } > } > > rb_erase_cached(>node, >queue); > @@ -2994,7 +2996,7 @@ static void execlists_reset_cancel(struct > intel_engine_cs *engine) > > /* On-hold requests will be flushed to timeline upon their release */ > list_for_each_entry(rq, >active.hold, sched.link) > - i915_request_mark_eio(rq); > + i915_request_put(i915_request_mark_eio(rq)); > > /* Cancel all attached virtual engines */ > while ((rb = rb_first_cached(>virtual))) { > @@ -3007,10 +3009,11 @@ static void execlists_reset_cancel(struct > intel_engine_cs *engine) > spin_lock(>base.active.lock); > rq = fetch_and_zero(>request); > if (rq) { > - i915_request_mark_eio(rq); > - > - rq->engine = engine; > - __i915_request_submit(rq); > + if (i915_request_mark_eio(rq)) { > + rq->engine = engine; > + __i915_request_submit(rq); > + i915_request_put(rq); > + } > i915_request_put(rq); > > ve->base.execlists.queue_priority_hint = INT_MIN; > diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c > b/drivers/gpu/drm/i915/gt/intel_reset.c > index 107430e1e864..a82c4d7b23bc 100644 > --- a/drivers/gpu/drm/i915/gt/intel_reset.c > +++ b/drivers/gpu/drm/i915/gt/intel_reset.c > @@ -786,18 +786,15 @@ static void reset_finish(struct intel_gt *gt, > intel_engine_mask_t awake) > > static void nop_submit_request(struct i915_request *request) > { > - struct intel_engine_cs *engine = request->engine; > - unsigned long flags; > - > RQ_TRACE(request, "-EIO\n"); > - i915_request_set_error_once(request, -EIO); > > - spin_lock_irqsave(>active.lock, flags); > - __i915_request_submit(request); > - i915_request_mark_complete(request); > - spin_unlock_irqrestore(>active.lock, flags); > + request = i915_request_mark_eio(request); > + if (request) { > + i915_request_submit(request); > + intel_engine_signal_breadcrumbs(request->engine); > > - intel_engine_signal_breadcrumbs(engine); > + i915_request_put(request); > + } > } > > static void __intel_gt_set_wedged(struct intel_gt *gt) > diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c > b/drivers/gpu/drm/i915/gt/intel_ring_submission.c > index 8b7cc637c432..9c2c605d7a92 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c > +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c > @@ -400,7 +400,7 @@ static void reset_cancel(struct intel_engine_cs *engine) > > /* Mark all submitted requests as skipped. */ > list_for_each_entry(request, >active.requests, sched.link) > - i915_request_mark_eio(request); > + i915_request_put(i915_request_mark_eio(request)); > intel_engine_signal_breadcrumbs(engine); > > /*
[Intel-gfx] [PATCH 04/57] drm/i915: Protect against request freeing during cancellation on wedging
As soon as we mark a request as completed, it may be retired. So when cancelling a request and marking it complete, make sure we first keep a reference to the request. Signed-off-by: Chris Wilson --- .../drm/i915/gt/intel_execlists_submission.c | 19 +++ drivers/gpu/drm/i915/gt/intel_reset.c | 15 ++- .../gpu/drm/i915/gt/intel_ring_submission.c | 2 +- drivers/gpu/drm/i915/gt/mock_engine.c | 8 +--- drivers/gpu/drm/i915/i915_request.c | 9 +++-- drivers/gpu/drm/i915/i915_request.h | 2 +- 6 files changed, 31 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index e7593df6777d..45a8ac152b88 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2976,7 +2976,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* Mark all executing requests as skipped. */ list_for_each_entry(rq, >active.requests, sched.link) - i915_request_mark_eio(rq); + i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(engine); /* Flush the queued requests to the timeline list (for retiring). */ @@ -2984,8 +2984,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); priolist_for_each_request_consume(rq, rn, p) { - i915_request_mark_eio(rq); - __i915_request_submit(rq); + if (i915_request_mark_eio(rq)) { + __i915_request_submit(rq); + i915_request_put(rq); + } } rb_erase_cached(>node, >queue); @@ -2994,7 +2996,7 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) /* On-hold requests will be flushed to timeline upon their release */ list_for_each_entry(rq, >active.hold, sched.link) - i915_request_mark_eio(rq); + i915_request_put(i915_request_mark_eio(rq)); /* Cancel all attached virtual engines */ while ((rb = rb_first_cached(>virtual))) { @@ -3007,10 +3009,11 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine) spin_lock(>base.active.lock); rq = fetch_and_zero(>request); if (rq) { - i915_request_mark_eio(rq); - - rq->engine = engine; - __i915_request_submit(rq); + if (i915_request_mark_eio(rq)) { + rq->engine = engine; + __i915_request_submit(rq); + i915_request_put(rq); + } i915_request_put(rq); ve->base.execlists.queue_priority_hint = INT_MIN; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 107430e1e864..a82c4d7b23bc 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -786,18 +786,15 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) static void nop_submit_request(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - RQ_TRACE(request, "-EIO\n"); - i915_request_set_error_once(request, -EIO); - spin_lock_irqsave(>active.lock, flags); - __i915_request_submit(request); - i915_request_mark_complete(request); - spin_unlock_irqrestore(>active.lock, flags); + request = i915_request_mark_eio(request); + if (request) { + i915_request_submit(request); + intel_engine_signal_breadcrumbs(request->engine); - intel_engine_signal_breadcrumbs(engine); + i915_request_put(request); + } } static void __intel_gt_set_wedged(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 8b7cc637c432..9c2c605d7a92 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -400,7 +400,7 @@ static void reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(request, >active.requests, sched.link) - i915_request_mark_eio(request); + i915_request_put(i915_request_mark_eio(request)); intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index