Not too long ago, we realised we had issues with a rolling back a
context so far for a preemption request we considered the resubmit not
to be a rollback but a forward roll. This means we would issue a lite
restore instead of forcing a full restore, continuing execution of the
old requests rather than causing a preemption. Add a selftest to
exercise such a far rollback, such that if we were to skip the full
restore, we would execute invalid instructions in the ring and hang.

Note that while I was able to confirm that this causes us to do a
lite-restore preemption rollback (with commit e36ba817fa96 ("drm/i915/gt:
Incrementally check for rewinding") disabled), it did not trick the HW
into rolling past the old RING_TAIL. Myybe on other HW.

References: e36ba817fa96 ("drm/i915/gt: Incrementally check for rewinding")
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuopp...@linux.intel.com>
---
 drivers/gpu/drm/i915/gt/selftest_lrc.c | 150 +++++++++++++++++++++++++
 1 file changed, 150 insertions(+)

diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c 
b/drivers/gpu/drm/i915/gt/selftest_lrc.c
index 91543494f595..3d088116a055 100644
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -363,6 +363,155 @@ static int live_unlite_preempt(void *arg)
        return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
 }
 
+static int live_unlite_ring(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       struct igt_spinner spin;
+       enum intel_engine_id id;
+       int err = 0;
+
+       /*
+        * Setup a preemption event that will cause almost the entire ring
+        * to be unwound, potentially fooling our intel_ring_direction()
+        * into emitting a forward lite-restore instead of the rollback.
+        */
+
+       if (igt_spinner_init(&spin, gt))
+               return -ENOMEM;
+
+       for_each_engine(engine, gt, id) {
+               struct intel_context *ce[2] = {};
+               struct i915_request *rq;
+               struct igt_live_test t;
+               int n;
+
+               if (!intel_engine_has_preemption(engine))
+                       continue;
+
+               if (!intel_engine_can_store_dword(engine))
+                       continue;
+
+               if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
+                       err = -EIO;
+                       break;
+               }
+               engine_heartbeat_disable(engine);
+
+               for (n = 0; n < ARRAY_SIZE(ce); n++) {
+                       struct intel_context *tmp;
+
+                       tmp = intel_context_create(engine);
+                       if (IS_ERR(tmp)) {
+                               err = PTR_ERR(tmp);
+                               goto err_ce;
+                       }
+
+                       err = intel_context_pin(tmp);
+                       if (err) {
+                               intel_context_put(tmp);
+                               goto err_ce;
+                       }
+
+                       memset32(tmp->ring->vaddr,
+                                0xdeadbeef, /* trigger a hang if executed */
+                                tmp->ring->vma->size / sizeof(u32));
+
+                       ce[n] = tmp;
+               }
+
+               rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
+               if (IS_ERR(rq)) {
+                       err = PTR_ERR(rq);
+                       goto err_ce;
+               }
+
+               i915_request_get(rq);
+               rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+               i915_request_add(rq);
+
+               if (!igt_wait_for_spinner(&spin, rq)) {
+                       intel_gt_set_wedged(gt);
+                       i915_request_put(rq);
+                       err = -ETIME;
+                       goto err_ce;
+               }
+
+               /* Fill the ring, until we will cause a wrap */
+               n = 0;
+               while (intel_ring_direction(ce[0]->ring,
+                                           rq->wa_tail,
+                                           ce[0]->ring->tail) <= 0) {
+                       struct i915_request *tmp;
+
+                       tmp = intel_context_create_request(ce[0]);
+                       if (IS_ERR(tmp)) {
+                               err = PTR_ERR(tmp);
+                               i915_request_put(rq);
+                               goto err_ce;
+                       }
+
+                       i915_request_add(tmp);
+                       intel_engine_flush_submission(engine);
+                       n++;
+               }
+               intel_engine_flush_submission(engine);
+               pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, 
emit:%x, rq.tail:%x}\n",
+                        engine->name, n,
+                        ce[0]->ring->size,
+                        ce[0]->ring->tail,
+                        ce[0]->ring->emit,
+                        rq->tail);
+               GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
+                                               rq->tail,
+                                               ce[0]->ring->tail) <= 0);
+               i915_request_put(rq);
+
+               /* Create a second request to preempt the first ring */
+               rq = intel_context_create_request(ce[1]);
+               if (IS_ERR(rq)) {
+                       err = PTR_ERR(rq);
+                       goto err_ce;
+               }
+
+               rq->sched.attr.priority = I915_PRIORITY_BARRIER;
+               i915_request_get(rq);
+               i915_request_add(rq);
+
+               err = wait_for_submit(engine, rq, HZ / 2);
+               i915_request_put(rq);
+               if (err) {
+                       pr_err("%s: preemption request was not submited\n",
+                              engine->name);
+                       err = -ETIME;
+               }
+
+               pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, 
emit:%x }\n",
+                        engine->name,
+                        ce[0]->ring->tail, ce[0]->ring->emit,
+                        ce[1]->ring->tail, ce[1]->ring->emit);
+
+err_ce:
+               intel_engine_flush_submission(engine);
+               igt_spinner_end(&spin);
+               for (n = 0; n < ARRAY_SIZE(ce); n++) {
+                       if (IS_ERR_OR_NULL(ce[n]))
+                               break;
+
+                       intel_context_unpin(ce[n]);
+                       intel_context_put(ce[n]);
+               }
+               engine_heartbeat_enable(engine);
+               if (igt_live_test_end(&t))
+                       err = -EIO;
+               if (err)
+                       break;
+       }
+
+       igt_spinner_fini(&spin);
+       return err;
+}
+
 static int live_pin_rewind(void *arg)
 {
        struct intel_gt *gt = arg;
@@ -4374,6 +4523,7 @@ int intel_execlists_live_selftests(struct 
drm_i915_private *i915)
                SUBTEST(live_sanitycheck),
                SUBTEST(live_unlite_switch),
                SUBTEST(live_unlite_preempt),
+               SUBTEST(live_unlite_ring),
                SUBTEST(live_pin_rewind),
                SUBTEST(live_hold_reset),
                SUBTEST(live_error_interrupt),
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to