Re: [Intel-gfx] [PATCH 04/57] drm/i915: Protect against request freeing during cancellation on wedging

2021-02-02 Thread Mika Kuoppala
Chris Wilson  writes:

> As soon as we mark a request as completed, it may be retired. So when
> cancelling a request and marking it complete, make sure we first keep a
> reference to the request.
>
> Signed-off-by: Chris Wilson 

Reviewed-by: Mika Kuoppala 

> ---
>  .../drm/i915/gt/intel_execlists_submission.c  | 19 +++
>  drivers/gpu/drm/i915/gt/intel_reset.c | 15 ++-
>  .../gpu/drm/i915/gt/intel_ring_submission.c   |  2 +-
>  drivers/gpu/drm/i915/gt/mock_engine.c |  8 +---
>  drivers/gpu/drm/i915/i915_request.c   |  9 +++--
>  drivers/gpu/drm/i915/i915_request.h   |  2 +-
>  6 files changed, 31 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
> b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> index e7593df6777d..45a8ac152b88 100644
> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
> @@ -2976,7 +2976,7 @@ static void execlists_reset_cancel(struct 
> intel_engine_cs *engine)
>  
>   /* Mark all executing requests as skipped. */
>   list_for_each_entry(rq, >active.requests, sched.link)
> - i915_request_mark_eio(rq);
> + i915_request_put(i915_request_mark_eio(rq));
>   intel_engine_signal_breadcrumbs(engine);
>  
>   /* Flush the queued requests to the timeline list (for retiring). */
> @@ -2984,8 +2984,10 @@ static void execlists_reset_cancel(struct 
> intel_engine_cs *engine)
>   struct i915_priolist *p = to_priolist(rb);
>  
>   priolist_for_each_request_consume(rq, rn, p) {
> - i915_request_mark_eio(rq);
> - __i915_request_submit(rq);
> + if (i915_request_mark_eio(rq)) {
> + __i915_request_submit(rq);
> + i915_request_put(rq);
> + }
>   }
>  
>   rb_erase_cached(>node, >queue);
> @@ -2994,7 +2996,7 @@ static void execlists_reset_cancel(struct 
> intel_engine_cs *engine)
>  
>   /* On-hold requests will be flushed to timeline upon their release */
>   list_for_each_entry(rq, >active.hold, sched.link)
> - i915_request_mark_eio(rq);
> + i915_request_put(i915_request_mark_eio(rq));
>  
>   /* Cancel all attached virtual engines */
>   while ((rb = rb_first_cached(>virtual))) {
> @@ -3007,10 +3009,11 @@ static void execlists_reset_cancel(struct 
> intel_engine_cs *engine)
>   spin_lock(>base.active.lock);
>   rq = fetch_and_zero(>request);
>   if (rq) {
> - i915_request_mark_eio(rq);
> -
> - rq->engine = engine;
> - __i915_request_submit(rq);
> + if (i915_request_mark_eio(rq)) {
> + rq->engine = engine;
> + __i915_request_submit(rq);
> + i915_request_put(rq);
> + }
>   i915_request_put(rq);
>  
>   ve->base.execlists.queue_priority_hint = INT_MIN;
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
> b/drivers/gpu/drm/i915/gt/intel_reset.c
> index 107430e1e864..a82c4d7b23bc 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -786,18 +786,15 @@ static void reset_finish(struct intel_gt *gt, 
> intel_engine_mask_t awake)
>  
>  static void nop_submit_request(struct i915_request *request)
>  {
> - struct intel_engine_cs *engine = request->engine;
> - unsigned long flags;
> -
>   RQ_TRACE(request, "-EIO\n");
> - i915_request_set_error_once(request, -EIO);
>  
> - spin_lock_irqsave(>active.lock, flags);
> - __i915_request_submit(request);
> - i915_request_mark_complete(request);
> - spin_unlock_irqrestore(>active.lock, flags);
> + request = i915_request_mark_eio(request);
> + if (request) {
> + i915_request_submit(request);
> + intel_engine_signal_breadcrumbs(request->engine);
>  
> - intel_engine_signal_breadcrumbs(engine);
> + i915_request_put(request);
> + }
>  }
>  
>  static void __intel_gt_set_wedged(struct intel_gt *gt)
> diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c 
> b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> index 8b7cc637c432..9c2c605d7a92 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
> @@ -400,7 +400,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
>  
>   /* Mark all submitted requests as skipped. */
>   list_for_each_entry(request, >active.requests, sched.link)
> - i915_request_mark_eio(request);
> + i915_request_put(i915_request_mark_eio(request));
>   intel_engine_signal_breadcrumbs(engine);
>  
>   /* 

[Intel-gfx] [PATCH 04/57] drm/i915: Protect against request freeing during cancellation on wedging

2021-02-01 Thread Chris Wilson
As soon as we mark a request as completed, it may be retired. So when
cancelling a request and marking it complete, make sure we first keep a
reference to the request.

Signed-off-by: Chris Wilson 
---
 .../drm/i915/gt/intel_execlists_submission.c  | 19 +++
 drivers/gpu/drm/i915/gt/intel_reset.c | 15 ++-
 .../gpu/drm/i915/gt/intel_ring_submission.c   |  2 +-
 drivers/gpu/drm/i915/gt/mock_engine.c |  8 +---
 drivers/gpu/drm/i915/i915_request.c   |  9 +++--
 drivers/gpu/drm/i915/i915_request.h   |  2 +-
 6 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c 
b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index e7593df6777d..45a8ac152b88 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -2976,7 +2976,7 @@ static void execlists_reset_cancel(struct intel_engine_cs 
*engine)
 
/* Mark all executing requests as skipped. */
list_for_each_entry(rq, >active.requests, sched.link)
-   i915_request_mark_eio(rq);
+   i915_request_put(i915_request_mark_eio(rq));
intel_engine_signal_breadcrumbs(engine);
 
/* Flush the queued requests to the timeline list (for retiring). */
@@ -2984,8 +2984,10 @@ static void execlists_reset_cancel(struct 
intel_engine_cs *engine)
struct i915_priolist *p = to_priolist(rb);
 
priolist_for_each_request_consume(rq, rn, p) {
-   i915_request_mark_eio(rq);
-   __i915_request_submit(rq);
+   if (i915_request_mark_eio(rq)) {
+   __i915_request_submit(rq);
+   i915_request_put(rq);
+   }
}
 
rb_erase_cached(>node, >queue);
@@ -2994,7 +2996,7 @@ static void execlists_reset_cancel(struct intel_engine_cs 
*engine)
 
/* On-hold requests will be flushed to timeline upon their release */
list_for_each_entry(rq, >active.hold, sched.link)
-   i915_request_mark_eio(rq);
+   i915_request_put(i915_request_mark_eio(rq));
 
/* Cancel all attached virtual engines */
while ((rb = rb_first_cached(>virtual))) {
@@ -3007,10 +3009,11 @@ static void execlists_reset_cancel(struct 
intel_engine_cs *engine)
spin_lock(>base.active.lock);
rq = fetch_and_zero(>request);
if (rq) {
-   i915_request_mark_eio(rq);
-
-   rq->engine = engine;
-   __i915_request_submit(rq);
+   if (i915_request_mark_eio(rq)) {
+   rq->engine = engine;
+   __i915_request_submit(rq);
+   i915_request_put(rq);
+   }
i915_request_put(rq);
 
ve->base.execlists.queue_priority_hint = INT_MIN;
diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
b/drivers/gpu/drm/i915/gt/intel_reset.c
index 107430e1e864..a82c4d7b23bc 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -786,18 +786,15 @@ static void reset_finish(struct intel_gt *gt, 
intel_engine_mask_t awake)
 
 static void nop_submit_request(struct i915_request *request)
 {
-   struct intel_engine_cs *engine = request->engine;
-   unsigned long flags;
-
RQ_TRACE(request, "-EIO\n");
-   i915_request_set_error_once(request, -EIO);
 
-   spin_lock_irqsave(>active.lock, flags);
-   __i915_request_submit(request);
-   i915_request_mark_complete(request);
-   spin_unlock_irqrestore(>active.lock, flags);
+   request = i915_request_mark_eio(request);
+   if (request) {
+   i915_request_submit(request);
+   intel_engine_signal_breadcrumbs(request->engine);
 
-   intel_engine_signal_breadcrumbs(engine);
+   i915_request_put(request);
+   }
 }
 
 static void __intel_gt_set_wedged(struct intel_gt *gt)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c 
b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 8b7cc637c432..9c2c605d7a92 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -400,7 +400,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
 
/* Mark all submitted requests as skipped. */
list_for_each_entry(request, >active.requests, sched.link)
-   i915_request_mark_eio(request);
+   i915_request_put(i915_request_mark_eio(request));
intel_engine_signal_breadcrumbs(engine);
 
/* Remaining _unready_ requests will be nop'ed when submitted */
diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c 
b/drivers/gpu/drm/i915/gt/mock_engine.c
index