Virtual engines are fleeting. They carry a reference count and may be freed
when their last request is retired. This makes them unsuitable for the
task of housing engine->retire.work so assert that it is not used.

Tvrtko tracked down an instance where we did indeed violate this rule.
In virtual_submit_request, we flush a completed request directly with
__i915_request_submit and this causes us to queue that request on the
veng's breadcrumb list and signal it. Leading us down a path where we
should not attach the retire.

v2: Always select a physical engine before submitting, and so avoid
using the veng as a signaler.

Reported-by: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Fixes: dc93c9b69315 ("drm/i915/gt: Schedule request retirement when signaler 
idles")
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_engine.h      |  1 +
 drivers/gpu/drm/i915/gt/intel_gt_requests.c |  3 +++
 drivers/gpu/drm/i915/gt/intel_lrc.c         | 17 ++++++++++++++---
 drivers/gpu/drm/i915/i915_request.c         |  2 ++
 4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h 
b/drivers/gpu/drm/i915/gt/intel_engine.h
index b36ec1fddc3d..5b21ca5478c2 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -217,6 +217,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs 
*engine);
 static inline void
 intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
 {
+       GEM_BUG_ON(!engine->breadcrumbs.irq_work.func);
        irq_work_queue(&engine->breadcrumbs.irq_work);
 }
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c 
b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index 7ef1d37970f6..8a5054f21bf8 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -99,6 +99,9 @@ static bool add_retire(struct intel_engine_cs *engine,
 void intel_engine_add_retire(struct intel_engine_cs *engine,
                             struct intel_timeline *tl)
 {
+       /* We don't deal well with the engine disappearing beneath us */
+       GEM_BUG_ON(intel_engine_is_virtual(engine));
+
        if (add_retire(engine, tl))
                schedule_work(&engine->retire_work);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index c196fb90c59f..e2bd1c357afc 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -4883,6 +4883,18 @@ static void virtual_submission_tasklet(unsigned long 
data)
        local_irq_enable();
 }
 
+static void __ve_request_submit(const struct virtual_engine *ve,
+                               struct i915_request *rq)
+{
+       /*
+        * Select a real engine to act as our permanent storage
+        * and signaler for the stale request, and prevent
+        * this virtual engine from leaking into the execution state.
+        */
+       rq->engine = ve->siblings[0]; /* chosen at random! */
+       __i915_request_submit(rq);
+}
+
 static void virtual_submit_request(struct i915_request *rq)
 {
        struct virtual_engine *ve = to_virtual_engine(rq->engine);
@@ -4900,12 +4912,12 @@ static void virtual_submit_request(struct i915_request 
*rq)
        old = ve->request;
        if (old) { /* background completion event from preempt-to-busy */
                GEM_BUG_ON(!i915_request_completed(old));
-               __i915_request_submit(old);
+               __ve_request_submit(ve, old);
                i915_request_put(old);
        }
 
        if (i915_request_completed(rq)) {
-               __i915_request_submit(rq);
+               __ve_request_submit(ve, rq);
 
                ve->base.execlists.queue_priority_hint = INT_MIN;
                ve->request = NULL;
@@ -5004,7 +5016,6 @@ intel_execlists_create_virtual(struct intel_engine_cs 
**siblings,
        snprintf(ve->base.name, sizeof(ve->base.name), "virtual");
 
        intel_engine_init_active(&ve->base, ENGINE_VIRTUAL);
-       intel_engine_init_breadcrumbs(&ve->base);
        intel_engine_init_execlists(&ve->base);
 
        ve->base.cops = &virtual_context_ops;
diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 0ecc2cf64216..2c45d4b93e2c 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -358,6 +358,8 @@ bool __i915_request_submit(struct i915_request *request)
        GEM_BUG_ON(!irqs_disabled());
        lockdep_assert_held(&engine->active.lock);
 
+       GEM_BUG_ON(intel_engine_is_virtual(engine));
+
        /*
         * With the advent of preempt-to-busy, we frequently encounter
         * requests that we have unsubmitted from HW, but left running
-- 
2.25.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to