[Intel-gfx] [PATCH 32/43] drm/i915/bdw: Avoid non-lite-restore preemptions

2014-07-24 Thread Thomas Daniel
From: Oscar Mateo 

In the current Execlists feeding mechanism, full preemption is not
supported yet: only lite-restores are allowed (this is: the GPU
simply samples a new tail pointer for the context currently in
execution).

But we have identified an scenario in which a full preemption occurs:
1) We submit two contexts for execution (A & B).
2) The GPU finishes with the first one (A), switches to the second one
(B) and informs us.
3) We submit B again (hoping to cause a lite restore) together with C,
but in the time we spend writing to the ELSP, the GPU finishes B.
4) The GPU start executing B again (since we told it so).
5) We receive a B finished interrupt and, mistakenly, we submit C (again)
and D, causing a full preemption of B.

The race is avoided by keeping track of how many times a context has been
submitted to the hardware and by better discriminating the received context
switch interrupts: in the example, when we have submitted B twice, we won´t
submit C and D as soon as we receive the notification that B is completed
because we were expecting to get a LITE_RESTORE and we didn´t, so we know a
second completion will be received shortly.

Without this explicit checking, somehow, the batch buffer execution order
gets messed with. This can be verified with the IGT test I sent together with
the series. I don´t know the exact mechanism by which the pre-emption messes
with the execution order but, since other people is working on the Scheduler
+ Preemption on Execlists, I didn´t try to fix it. In these series, only Lite
Restores are supported (other kind of preemptions WARN).

v2: elsp_submitted belongs in the new intel_ctx_submit_request. Several
rebase changes.

v3: Clarify how the race is avoided, as requested by Daniel.

Signed-off-by: Oscar Mateo 
---
 drivers/gpu/drm/i915/intel_lrc.c |   28 
 drivers/gpu/drm/i915/intel_lrc.h |2 ++
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 65f4f26..895dbfc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -264,6 +264,7 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
else if (req0->ctx == cursor->ctx) {
/* Same ctx: ignore first request, as second request
 * will update tail past first request's workload */
+   cursor->elsp_submitted = req0->elsp_submitted;
list_del(&req0->execlist_link);
queue_work(dev_priv->wq, &req0->work);
req0 = cursor;
@@ -273,8 +274,14 @@ static void execlists_context_unqueue(struct 
intel_engine_cs *ring)
}
}
 
+   WARN_ON(req1 && req1->elsp_submitted);
+
BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
req1? req1->ctx : NULL, req1? req1->tail : 0));
+
+   req0->elsp_submitted++;
+   if (req1)
+   req1->elsp_submitted++;
 }
 
 static bool execlists_check_remove_request(struct intel_engine_cs *ring,
@@ -291,9 +298,13 @@ static bool execlists_check_remove_request(struct 
intel_engine_cs *ring,
struct drm_i915_gem_object *ctx_obj =
head_req->ctx->engine[ring->id].state;
if (intel_execlists_ctx_id(ctx_obj) == request_id) {
-   list_del(&head_req->execlist_link);
-   queue_work(dev_priv->wq, &head_req->work);
-   return true;
+   WARN(head_req->elsp_submitted == 0,
+   "Never submitted head request\n");
+   if (--head_req->elsp_submitted <= 0) {
+   list_del(&head_req->execlist_link);
+   queue_work(dev_priv->wq, &head_req->work);
+   return true;
+   }
}
}
 
@@ -326,7 +337,16 @@ void intel_execlists_handle_ctx_events(struct 
intel_engine_cs *ring)
status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
(read_pointer % 6) * 8 + 4);
 
-   if (status & GEN8_CTX_STATUS_COMPLETE) {
+   if (status & GEN8_CTX_STATUS_PREEMPTED) {
+   if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
+   if (execlists_check_remove_request(ring, 
status_id))
+   WARN(1, "Lite Restored request removed 
from queue\n");
+   } else
+   WARN(1, "Preemption without Lite Restore\n");
+   }
+
+if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
if (execlists_check_remove_request(ring, status_id))
  

Re: [Intel-gfx] [PATCH 32/43] drm/i915/bdw: Avoid non-lite-restore preemptions

2014-08-14 Thread Daniel Vetter
On Thu, Jul 24, 2014 at 05:04:40PM +0100, Thomas Daniel wrote:
> From: Oscar Mateo 
> 
> In the current Execlists feeding mechanism, full preemption is not
> supported yet: only lite-restores are allowed (this is: the GPU
> simply samples a new tail pointer for the context currently in
> execution).
> 
> But we have identified an scenario in which a full preemption occurs:
> 1) We submit two contexts for execution (A & B).
> 2) The GPU finishes with the first one (A), switches to the second one
> (B) and informs us.
> 3) We submit B again (hoping to cause a lite restore) together with C,
> but in the time we spend writing to the ELSP, the GPU finishes B.
> 4) The GPU start executing B again (since we told it so).
> 5) We receive a B finished interrupt and, mistakenly, we submit C (again)
> and D, causing a full preemption of B.
> 
> The race is avoided by keeping track of how many times a context has been
> submitted to the hardware and by better discriminating the received context
> switch interrupts: in the example, when we have submitted B twice, we won´t
> submit C and D as soon as we receive the notification that B is completed
> because we were expecting to get a LITE_RESTORE and we didn´t, so we know a
> second completion will be received shortly.
> 
> Without this explicit checking, somehow, the batch buffer execution order
> gets messed with. This can be verified with the IGT test I sent together with
> the series. I don´t know the exact mechanism by which the pre-emption messes
> with the execution order but, since other people is working on the Scheduler
> + Preemption on Execlists, I didn´t try to fix it. In these series, only Lite
> Restores are supported (other kind of preemptions WARN).

Where's this igt patch? The kernel patch here is at least missing the

Testcase: igt/foo

tag. Please supply.
-Daniel

> 
> v2: elsp_submitted belongs in the new intel_ctx_submit_request. Several
> rebase changes.
> 
> v3: Clarify how the race is avoided, as requested by Daniel.
> 
> Signed-off-by: Oscar Mateo 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c |   28 
>  drivers/gpu/drm/i915/intel_lrc.h |2 ++
>  2 files changed, 26 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 65f4f26..895dbfc 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -264,6 +264,7 @@ static void execlists_context_unqueue(struct 
> intel_engine_cs *ring)
>   else if (req0->ctx == cursor->ctx) {
>   /* Same ctx: ignore first request, as second request
>* will update tail past first request's workload */
> + cursor->elsp_submitted = req0->elsp_submitted;
>   list_del(&req0->execlist_link);
>   queue_work(dev_priv->wq, &req0->work);
>   req0 = cursor;
> @@ -273,8 +274,14 @@ static void execlists_context_unqueue(struct 
> intel_engine_cs *ring)
>   }
>   }
>  
> + WARN_ON(req1 && req1->elsp_submitted);
> +
>   BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
>   req1? req1->ctx : NULL, req1? req1->tail : 0));
> +
> + req0->elsp_submitted++;
> + if (req1)
> + req1->elsp_submitted++;
>  }
>  
>  static bool execlists_check_remove_request(struct intel_engine_cs *ring,
> @@ -291,9 +298,13 @@ static bool execlists_check_remove_request(struct 
> intel_engine_cs *ring,
>   struct drm_i915_gem_object *ctx_obj =
>   head_req->ctx->engine[ring->id].state;
>   if (intel_execlists_ctx_id(ctx_obj) == request_id) {
> - list_del(&head_req->execlist_link);
> - queue_work(dev_priv->wq, &head_req->work);
> - return true;
> + WARN(head_req->elsp_submitted == 0,
> + "Never submitted head request\n");
> + if (--head_req->elsp_submitted <= 0) {
> + list_del(&head_req->execlist_link);
> + queue_work(dev_priv->wq, &head_req->work);
> + return true;
> + }
>   }
>   }
>  
> @@ -326,7 +337,16 @@ void intel_execlists_handle_ctx_events(struct 
> intel_engine_cs *ring)
>   status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
>   (read_pointer % 6) * 8 + 4);
>  
> - if (status & GEN8_CTX_STATUS_COMPLETE) {
> + if (status & GEN8_CTX_STATUS_PREEMPTED) {
> + if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
> + if (execlists_check_remove_request(ring, 
> status_id))
> + WARN(1, "Lite Restored request removed 
> from queue\n");
> + } else
> +