[Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise potential false lite-restore

2019-10-01 Thread Chris Wilson
If execlists's lite-restore is based on the common GEM context tag
rather than the per-intel_context LRCA, then a context switch between
two intel_contexts on the same engine derived from the same GEM context
will perform a lite-restore instead of a full context switch. We can
exploit this by poisoning the ringbuffer of the first context and trying
to trick a simple RING_TAIL update (i.e. lite-restore)

v2: Also check what happens if preempt ce[0] with ce[1] (both instances
on the same engine from the same parent context) [Tvrtko]

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin 
---
Fixup GEM_BUG_ON to look at rq->tail only after it is set!
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 174 +
 1 file changed, 174 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 93f2fcdc49bf..f4d6a1b734ae 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -79,6 +79,178 @@ static int live_sanitycheck(void *arg)
return err;
 }
 
+static int live_unlite_restore(struct drm_i915_private *i915, int prio)
+{
+   struct intel_engine_cs *engine;
+   struct i915_gem_context *ctx;
+   enum intel_engine_id id;
+   intel_wakeref_t wakeref;
+   struct igt_spinner spin;
+   int err = -ENOMEM;
+
+   /*
+* Check that we can correctly context switch between 2 instances
+* on the same engine from the same parent context.
+*/
+
+   mutex_lock(&i915->drm.struct_mutex);
+   wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+   if (igt_spinner_init(&spin, &i915->gt))
+   goto err_unlock;
+
+   ctx = kernel_context(i915);
+   if (!ctx)
+   goto err_spin;
+
+   err = 0;
+   for_each_engine(engine, i915, id) {
+   struct intel_context *ce[2] = {};
+   struct i915_request *rq[2];
+   struct igt_live_test t;
+   int n;
+
+   if (prio && !intel_engine_has_preemption(engine))
+   continue;
+
+   if (!intel_engine_can_store_dword(engine))
+   continue;
+
+   if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+   err = -EIO;
+   break;
+   }
+
+   for (n = 0; n < ARRAY_SIZE(ce); n++) {
+   struct intel_context *tmp;
+
+   tmp = intel_context_create(ctx, engine);
+   if (IS_ERR(tmp)) {
+   err = PTR_ERR(tmp);
+   goto err_ce;
+   }
+
+   err = intel_context_pin(tmp);
+   if (err) {
+   intel_context_put(tmp);
+   goto err_ce;
+   }
+
+   /*
+* Setup the pair of contexts such that if we
+* lite-restore using the RING_TAIL from ce[1] it
+* will execute garbage from ce[0]->ring.
+*/
+   memset(tmp->ring->vaddr,
+  POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+  tmp->ring->vma->size);
+
+   ce[n] = tmp;
+   }
+   GEM_BUG_ON(!ce[1]->ring->size);
+   intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
+   __execlists_update_reg_state(ce[1], engine);
+
+
+   rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+   if (IS_ERR(rq[0])) {
+   err = PTR_ERR(rq[0]);
+   goto err_ce;
+   }
+
+   i915_request_get(rq[0]);
+   i915_request_add(rq[0]);
+   GEM_BUG_ON(rq[0]->tail > ce[1]->ring->emit);
+
+   if (!igt_wait_for_spinner(&spin, rq[0])) {
+   i915_request_put(rq[0]);
+   goto err_ce;
+   }
+
+   rq[1] = i915_request_create(ce[1]);
+   if (IS_ERR(rq[1])) {
+   err = PTR_ERR(rq[1]);
+   i915_request_put(rq[0]);
+   goto err_ce;
+   }
+
+   if (!prio) {
+   /*
+* Ensure we do the switch to ce[1] on completion.
+*
+* rq[0] is already submitted, so this should reduce
+* to a no-op (a wait on a request on the same engine
+* uses the submit fence, not the completion fence),
+* but it will install a dependency on rq[1] for rq[0]
+* that will prevent the pair being reordered by
+* timeslicing.
+

Re: [Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise potential false lite-restore

2019-10-01 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-10-01 13:59:14)
> 
> On 01/10/2019 13:43, Chris Wilson wrote:
> > + tasklet_kill(&engine->execlists.tasklet); /* flush submission 
> > */
> 
> Is this really needed, why?

In a pathological case where we are using the tasklet (e.g. preemption
and ksoftirqd active), it would then be possible for the spinner to
complete (thanks to the igt_spinner_end below) before we process the
preemption request (thus we would not perform a preemption request). Now
it may still complete before the HW has a chance to process the ELSP
submit, but that risk feels less likely. (We would need to wait on
!execlists->pending with a timeout to be sure.)

tasklet_kill() is
while tasklet is queued and not run:
yield();

I think we need only the one flush as we only really care about the
first available execlists_submit_port that has both ELSP filled to check
for a possible lite-restore between the different contexts.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise potential false lite-restore

2019-10-01 Thread Tvrtko Ursulin


On 01/10/2019 13:43, Chris Wilson wrote:

If execlists's lite-restore is based on the common GEM context tag
rather than the per-intel_context LRCA, then a context switch between
two intel_contexts on the same engine derived from the same GEM context
will perform a lite-restore instead of a full context switch. We can
exploit this by poisoning the ringbuffer of the first context and trying
to trick a simple RING_TAIL update (i.e. lite-restore)

v2: Also check what happens if preempt ce[0] with ce[1] (both instances
on the same engine from the same parent context) [Tvrtko]

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/gt/selftest_lrc.c | 173 +
  1 file changed, 173 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 93f2fcdc49bf..de498c38a006 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -79,6 +79,177 @@ static int live_sanitycheck(void *arg)
return err;
  }
  
+static int live_unlite_restore(struct drm_i915_private *i915, int prio)

+{
+   struct intel_engine_cs *engine;
+   struct i915_gem_context *ctx;
+   enum intel_engine_id id;
+   intel_wakeref_t wakeref;
+   struct igt_spinner spin;
+   int err = -ENOMEM;
+
+   /*
+* Check that we can correctly context switch between 2 instances
+* on the same engine from the same parent context.
+*/
+
+   mutex_lock(&i915->drm.struct_mutex);
+   wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+   if (igt_spinner_init(&spin, &i915->gt))
+   goto err_unlock;
+
+   ctx = kernel_context(i915);
+   if (!ctx)
+   goto err_spin;
+
+   err = 0;
+   for_each_engine(engine, i915, id) {
+   struct intel_context *ce[2] = {};
+   struct i915_request *rq[2];
+   struct igt_live_test t;
+   int n;
+
+   if (prio && !intel_engine_has_preemption(engine))
+   continue;
+
+   if (!intel_engine_can_store_dword(engine))
+   continue;
+
+   if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+   err = -EIO;
+   break;
+   }
+
+   for (n = 0; n < ARRAY_SIZE(ce); n++) {
+   struct intel_context *tmp;
+
+   tmp = intel_context_create(ctx, engine);
+   if (IS_ERR(tmp)) {
+   err = PTR_ERR(tmp);
+   goto err_ce;
+   }
+
+   err = intel_context_pin(tmp);
+   if (err) {
+   intel_context_put(tmp);
+   goto err_ce;
+   }
+
+   /*
+* Setup the pair of contexts such that if we
+* lite-restore using the RING_TAIL from ce[1] it
+* will execute garbage from ce[0]->ring.
+*/
+   memset(tmp->ring->vaddr,
+  POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+  tmp->ring->vma->size);
+
+   ce[n] = tmp;
+   }
+   intel_ring_reset(ce[1]->ring, ce[1]->ring->vma->size / 2);
+   __execlists_update_reg_state(ce[1], engine);
+
+   rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+   if (IS_ERR(rq[0])) {
+   err = PTR_ERR(rq[0]);
+   goto err_ce;
+   }
+
+   GEM_BUG_ON(rq[0]->tail > ce[1]->ring->emit);
+   i915_request_get(rq[0]);
+   i915_request_add(rq[0]);
+
+   if (!igt_wait_for_spinner(&spin, rq[0])) {
+   i915_request_put(rq[0]);
+   goto err_ce;
+   }
+
+   rq[1] = i915_request_create(ce[1]);
+   if (IS_ERR(rq[1])) {
+   err = PTR_ERR(rq[1]);
+   i915_request_put(rq[0]);
+   goto err_ce;
+   }
+   GEM_BUG_ON(rq[1]->tail <= rq[0]->tail);
+
+   if (!prio) {
+   /*
+* Ensure we do the switch to ce[1] on completion.
+*
+* rq[0] is already submitted, so this should reduce
+* to a no-op (a wait on a request on the same engine
+* uses the submit fence, not the completion fence),
+* but it will install a dependency on rq[1] for rq[0]
+* that will prevent the pair being reordered by
+* timeslicing.
+*/
+

[Intel-gfx] [PATCH v2] drm/i915/selftests: Exercise potential false lite-restore

2019-10-01 Thread Chris Wilson
If execlists's lite-restore is based on the common GEM context tag
rather than the per-intel_context LRCA, then a context switch between
two intel_contexts on the same engine derived from the same GEM context
will perform a lite-restore instead of a full context switch. We can
exploit this by poisoning the ringbuffer of the first context and trying
to trick a simple RING_TAIL update (i.e. lite-restore)

v2: Also check what happens if preempt ce[0] with ce[1] (both instances
on the same engine from the same parent context) [Tvrtko]

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 173 +
 1 file changed, 173 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 93f2fcdc49bf..de498c38a006 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -79,6 +79,177 @@ static int live_sanitycheck(void *arg)
return err;
 }
 
+static int live_unlite_restore(struct drm_i915_private *i915, int prio)
+{
+   struct intel_engine_cs *engine;
+   struct i915_gem_context *ctx;
+   enum intel_engine_id id;
+   intel_wakeref_t wakeref;
+   struct igt_spinner spin;
+   int err = -ENOMEM;
+
+   /*
+* Check that we can correctly context switch between 2 instances
+* on the same engine from the same parent context.
+*/
+
+   mutex_lock(&i915->drm.struct_mutex);
+   wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+   if (igt_spinner_init(&spin, &i915->gt))
+   goto err_unlock;
+
+   ctx = kernel_context(i915);
+   if (!ctx)
+   goto err_spin;
+
+   err = 0;
+   for_each_engine(engine, i915, id) {
+   struct intel_context *ce[2] = {};
+   struct i915_request *rq[2];
+   struct igt_live_test t;
+   int n;
+
+   if (prio && !intel_engine_has_preemption(engine))
+   continue;
+
+   if (!intel_engine_can_store_dword(engine))
+   continue;
+
+   if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+   err = -EIO;
+   break;
+   }
+
+   for (n = 0; n < ARRAY_SIZE(ce); n++) {
+   struct intel_context *tmp;
+
+   tmp = intel_context_create(ctx, engine);
+   if (IS_ERR(tmp)) {
+   err = PTR_ERR(tmp);
+   goto err_ce;
+   }
+
+   err = intel_context_pin(tmp);
+   if (err) {
+   intel_context_put(tmp);
+   goto err_ce;
+   }
+
+   /*
+* Setup the pair of contexts such that if we
+* lite-restore using the RING_TAIL from ce[1] it
+* will execute garbage from ce[0]->ring.
+*/
+   memset(tmp->ring->vaddr,
+  POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
+  tmp->ring->vma->size);
+
+   ce[n] = tmp;
+   }
+   intel_ring_reset(ce[1]->ring, ce[1]->ring->vma->size / 2);
+   __execlists_update_reg_state(ce[1], engine);
+
+   rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+   if (IS_ERR(rq[0])) {
+   err = PTR_ERR(rq[0]);
+   goto err_ce;
+   }
+
+   GEM_BUG_ON(rq[0]->tail > ce[1]->ring->emit);
+   i915_request_get(rq[0]);
+   i915_request_add(rq[0]);
+
+   if (!igt_wait_for_spinner(&spin, rq[0])) {
+   i915_request_put(rq[0]);
+   goto err_ce;
+   }
+
+   rq[1] = i915_request_create(ce[1]);
+   if (IS_ERR(rq[1])) {
+   err = PTR_ERR(rq[1]);
+   i915_request_put(rq[0]);
+   goto err_ce;
+   }
+   GEM_BUG_ON(rq[1]->tail <= rq[0]->tail);
+
+   if (!prio) {
+   /*
+* Ensure we do the switch to ce[1] on completion.
+*
+* rq[0] is already submitted, so this should reduce
+* to a no-op (a wait on a request on the same engine
+* uses the submit fence, not the completion fence),
+* but it will install a dependency on rq[1] for rq[0]
+* that will prevent the pair being reordered by
+* timeslicing.
+*/
+   i915_request_await_dma_fence(rq[1]