[Intel-gfx] [PATCH 14/18] drm/i915/execlists: Force preemption via reset on timeout

2018-04-09 Thread Chris Wilson
Install a timer when trying to preempt on behalf of an important
context such that if the active context does not honour the preemption
request within the desired timeout, then we reset the GPU to allow the
important context to run.

v2: Install the timer on scheduling the preempt request; long before we
even try to inject preemption into the ELSP, as the tasklet/injection
may itself be blocked.
v3: Update the guc to handle the preemption/tasklet timer.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_guc_submission.c |  1 +
 drivers/gpu/drm/i915/intel_lrc.c| 88 +++--
 drivers/gpu/drm/i915/intel_ringbuffer.h |  8 +-
 drivers/gpu/drm/i915/selftests/intel_lrc.c  | 60 ++
 4 files changed, 148 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 994082712181..5577d6f717e3 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -750,6 +750,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
kmem_cache_free(engine->i915->priorities, p);
}
 done:
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 73fb941a675e..ca1c54af2877 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -550,6 +550,52 @@ static void inject_preempt_context(struct intel_engine_cs 
*engine)
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
+{
+   struct intel_engine_execlists *execlists =
+   container_of(hrtimer, typeof(*execlists), preempt_timer);
+
+   GEM_TRACE("%s\n",
+ container_of(execlists,
+  struct intel_engine_cs,
+  execlists)->name);
+
+   if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+   return HRTIMER_NORESTART;
+
+   if (GEM_SHOW_DEBUG()) {
+   struct intel_engine_cs *engine =
+   container_of(execlists, typeof(*engine), execlists);
+   struct drm_printer p = drm_debug_printer(__func__);
+
+   intel_engine_dump( engine, , "%s\n", engine->name);
+   }
+
+   queue_work(system_highpri_wq, >preempt_reset);
+
+   return HRTIMER_NORESTART;
+}
+
+static void preempt_reset(struct work_struct *work)
+{
+   struct intel_engine_execlists *execlists =
+   container_of(work, typeof(*execlists), preempt_reset);
+   struct intel_engine_cs *engine =
+ container_of(execlists, struct intel_engine_cs, execlists);
+
+   GEM_TRACE("%s\n", engine->name);
+
+   tasklet_disable(>tasklet);
+
+   execlists->tasklet.func(execlists->tasklet.data);
+
+   if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+   i915_handle_error(engine->i915, BIT(engine->id), 0,
+ "preemption time out on %s", engine->name);
+
+   tasklet_enable(>tasklet);
+}
+
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
@@ -724,6 +770,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
kmem_cache_free(engine->i915->priorities, p);
}
 done:
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
execlists->queue_priority =
port != execlists->port ? rq_prio(last) : INT_MIN;
execlists->first = rb;
@@ -1109,16 +1156,38 @@ static void queue_request(struct intel_engine_cs 
*engine,
list_add_tail(>link, _priolist(engine, pt, prio)->requests);
 }
 
-static void __submit_queue(struct intel_engine_cs *engine, int prio)
+static void __submit_queue(struct intel_engine_cs *engine,
+  int prio, unsigned int timeout)
 {
-   engine->execlists.queue_priority = prio;
-   tasklet_hi_schedule(>execlists.tasklet);
+   struct intel_engine_execlists * const execlists = >execlists;
+   int old = execlists->queue_priority;
+
+   GEM_TRACE("%s prio=%d (previous=%d)\n", engine->name, prio, old);
+
+   if (unlikely(execlists_is_active(execlists,
+EXECLISTS_ACTIVE_PREEMPT_TIMEOUT)))
+   hrtimer_cancel(>preempt_timer);
+
+   execlists->queue_priority = prio;
+   tasklet_hi_schedule(>tasklet);
+
+   /* Set a timer to force preemption vs hostile userspace */
+   if (timeout && __execlists_need_preempt(prio, old)) {
+   GEM_TRACE("%s 

[Intel-gfx] [PATCH 14/18] drm/i915/execlists: Force preemption via reset on timeout

2018-03-30 Thread Chris Wilson
Install a timer when trying to preempt on behalf of an important
context such that if the active context does not honour the preemption
request within the desired timeout, then we reset the GPU to allow the
important context to run.

v2: Install the timer on scheduling the preempt request; long before we
even try to inject preemption into the ELSP, as the tasklet/injection
may itself be blocked.
v3: Update the guc to handle the preemption/tasklet timer.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/intel_guc_submission.c |  4 ++
 drivers/gpu/drm/i915/intel_lrc.c| 82 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h |  8 ++-
 drivers/gpu/drm/i915/selftests/intel_lrc.c  | 66 +++
 4 files changed, 151 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 0e0655430480..d10068579285 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -627,6 +627,9 @@ static void complete_preempt_context(struct intel_engine_cs 
*engine)
 
GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
+   hrtimer_try_to_cancel(>preempt_timer);
+
execlists_cancel_port_requests(execlists);
execlists_unwind_incomplete_requests(execlists);
 
@@ -739,6 +742,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
kmem_cache_free(engine->i915->priorities, p);
}
 done:
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN;
execlists->first = rb;
if (submit) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 731f8de56ea0..bee8a58f340f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -549,10 +549,50 @@ static void inject_preempt_context(struct intel_engine_cs 
*engine)
execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static enum hrtimer_restart preempt_timeout(struct hrtimer *hrtimer)
+{
+   struct intel_engine_execlists *execlists =
+   container_of(hrtimer, typeof(*execlists), preempt_timer);
+
+   GEM_TRACE("%s\n",
+ container_of(execlists,
+  struct intel_engine_cs,
+  execlists)->name);
+
+   if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+   return HRTIMER_NORESTART;
+
+   queue_work(system_highpri_wq, >preempt_reset);
+   return HRTIMER_NORESTART;
+}
+
+static void preempt_reset(struct work_struct *work)
+{
+   struct intel_engine_execlists *execlists =
+   container_of(work, typeof(*execlists), preempt_reset);
+   struct intel_engine_cs *engine =
+ container_of(execlists, struct intel_engine_cs, execlists);
+
+   GEM_TRACE("%s\n", engine->name);
+
+   tasklet_disable(>tasklet);
+
+   execlists->tasklet.func(execlists->tasklet.data);
+
+   if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT))
+   i915_handle_error(engine->i915, BIT(engine->id), 0,
+ "preemption time out on %s", engine->name);
+
+   tasklet_enable(>tasklet);
+}
+
 static void complete_preempt_context(struct intel_engine_execlists *execlists)
 {
GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT));
 
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
+   hrtimer_try_to_cancel(>preempt_timer);
+
execlists_cancel_port_requests(execlists);
execlists_unwind_incomplete_requests(execlists);
 
@@ -722,6 +762,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
kmem_cache_free(engine->i915->priorities, p);
}
 done:
+   execlists_clear_active(execlists, EXECLISTS_ACTIVE_PREEMPT_TIMEOUT);
execlists->queue_priority =
port != execlists->port ? rq_prio(last) : INT_MIN;
execlists->first = rb;
@@ -1099,16 +1140,38 @@ static void queue_request(struct intel_engine_cs 
*engine,
list_add_tail(>link, _priolist(engine, pt, prio)->requests);
 }
 
-static void __submit_queue(struct intel_engine_cs *engine, int prio)
+static void __submit_queue(struct intel_engine_cs *engine,
+  int prio, unsigned int timeout)
 {
-   engine->execlists.queue_priority = prio;
-   tasklet_hi_schedule(>execlists.tasklet);
+   struct intel_engine_execlists * const execlists = >execlists;
+   int old = execlists->queue_priority;
+
+   GEM_TRACE("%s prio=%d (previous=%d)\n", engine->name, prio, old);
+
+   if (unlikely(execlists_is_active(execlists,
+