execlists: Flush pending preemption events during reset

jeff . mcgee Wed, 21 Mar 2018 10:41:54 -0700

From: Chris Wilson <ch...@chris-wilson.co.uk>

Catch up with the inflight CSB events, after disabling the tasklet
before deciding which request was truly guilty of hanging the GPU.


Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Michał Winiarski <michal.winiar...@intel.com>
CC: Michel Thierry <michel.thie...@intel.com>
Cc: Jeff McGee <jeff.mc...@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 355 ++++++++++++++++++++++-----------------
 1 file changed, 197 insertions(+), 158 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e5a3ffbc273a..beb81f13a3cc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -828,186 +828,192 @@ static void execlists_cancel_requests(struct 
intel_engine_cs *engine)
        local_irq_restore(flags);
 }
 
-/*
- * Check the unread Context Status Buffers and manage the submission of new
- * contexts to the ELSP accordingly.
- */
-static void execlists_submission_tasklet(unsigned long data)
+static void process_csb(struct intel_engine_cs *engine)
 {
-       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
        struct intel_engine_execlists * const execlists = &engine->execlists;
        struct execlist_port * const port = execlists->port;
-       struct drm_i915_private *dev_priv = engine->i915;
+       struct drm_i915_private *i915 = engine->i915;
+       unsigned int head, tail;
+       const u32 *buf;
        bool fw = false;
 
-       /* We can skip acquiring intel_runtime_pm_get() here as it was taken
-        * on our behalf by the request (see i915_gem_mark_busy()) and it will
-        * not be relinquished until the device is idle (see
-        * i915_gem_idle_work_handler()). As a precaution, we make sure
-        * that all ELSP are drained i.e. we have processed the CSB,
-        * before allowing ourselves to idle and calling intel_runtime_pm_put().
-        */
-       GEM_BUG_ON(!dev_priv->gt.awake);
+       if (unlikely(execlists->csb_use_mmio)) {
+               buf = (u32 * __force)
+                       (i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
+               execlists->csb_head = -1; /* force mmio read of CSB ptrs */
+       } else {
+               /* The HWSP contains a (cacheable) mirror of the CSB */
+               buf = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+       }
 
-       /* Prefer doing test_and_clear_bit() as a two stage operation to avoid
-        * imposing the cost of a locked atomic transaction when submitting a
-        * new request (outside of the context-switch interrupt).
+       /*
+        * The write will be ordered by the uncached read (itself
+        * a memory barrier), so we do not need another in the form
+        * of a locked instruction. The race between the interrupt
+        * handler and the split test/clear is harmless as we order
+        * our clear before the CSB read. If the interrupt arrived
+        * first between the test and the clear, we read the updated
+        * CSB and clear the bit. If the interrupt arrives as we read
+        * the CSB or later (i.e. after we had cleared the bit) the bit
+        * is set and we do a new loop.
         */
-       while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
-               /* The HWSP contains a (cacheable) mirror of the CSB */
-               const u32 *buf =
-                       &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
-               unsigned int head, tail;
-
-               if (unlikely(execlists->csb_use_mmio)) {
-                       buf = (u32 * __force)
-                               (dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0)));
-                       execlists->csb_head = -1; /* force mmio read of CSB 
ptrs */
-               }
+       __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+       if (unlikely(execlists->csb_head == -1)) { /* following a reset */
+               intel_uncore_forcewake_get(i915, execlists->fw_domains);
+               fw = true;
+
+               head = readl(i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+               tail = GEN8_CSB_WRITE_PTR(head);
+               head = GEN8_CSB_READ_PTR(head);
+               execlists->csb_head = head;
+       } else {
+               const int write_idx =
+                       intel_hws_csb_write_index(i915) -
+                       I915_HWS_CSB_BUF0_INDEX;
+
+               head = execlists->csb_head;
+               tail = READ_ONCE(buf[write_idx]);
+       }
+       GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
+                 engine->name,
+                 head, GEN8_CSB_READ_PTR(readl(i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
+                 tail, GEN8_CSB_WRITE_PTR(readl(i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+
+       while (head != tail) {
+               struct i915_request *rq;
+               unsigned int status;
+               unsigned int count;
+
+               if (++head == GEN8_CSB_ENTRIES)
+                       head = 0;
 
-               /* The write will be ordered by the uncached read (itself
-                * a memory barrier), so we do not need another in the form
-                * of a locked instruction. The race between the interrupt
-                * handler and the split test/clear is harmless as we order
-                * our clear before the CSB read. If the interrupt arrived
-                * first between the test and the clear, we read the updated
-                * CSB and clear the bit. If the interrupt arrives as we read
-                * the CSB or later (i.e. after we had cleared the bit) the bit
-                * is set and we do a new loop.
+               /*
+                * We are flying near dragons again.
+                *
+                * We hold a reference to the request in execlist_port[]
+                * but no more than that. We are operating in softirq
+                * context and so cannot hold any mutex or sleep. That
+                * prevents us stopping the requests we are processing
+                * in port[] from being retired simultaneously (the
+                * breadcrumb will be complete before we see the
+                * context-switch). As we only hold the reference to the
+                * request, any pointer chasing underneath the request
+                * is subject to a potential use-after-free. Thus we
+                * store all of the bookkeeping within port[] as
+                * required, and avoid using unguarded pointers beneath
+                * request itself. The same applies to the atomic
+                * status notifier.
                 */
-               __clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-               if (unlikely(execlists->csb_head == -1)) { /* following a reset 
*/
-                       if (!fw) {
-                               intel_uncore_forcewake_get(dev_priv,
-                                                          
execlists->fw_domains);
-                               fw = true;
-                       }
 
-                       head = readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-                       tail = GEN8_CSB_WRITE_PTR(head);
-                       head = GEN8_CSB_READ_PTR(head);
-                       execlists->csb_head = head;
-               } else {
-                       const int write_idx =
-                               intel_hws_csb_write_index(dev_priv) -
-                               I915_HWS_CSB_BUF0_INDEX;
+               status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
+               GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
+                         engine->name, head,
+                         status, buf[2*head + 1],
+                         execlists->active);
+
+               if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+                             GEN8_CTX_STATUS_PREEMPTED))
+                       execlists_set_active(execlists,
+                                            EXECLISTS_ACTIVE_HWACK);
+               if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+                       execlists_clear_active(execlists,
+                                              EXECLISTS_ACTIVE_HWACK);
+
+               if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+                       continue;
 
-                       head = execlists->csb_head;
-                       tail = READ_ONCE(buf[write_idx]);
-               }
-               GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n",
-                         engine->name,
-                         head, GEN8_CSB_READ_PTR(readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?",
-                         tail, GEN8_CSB_WRITE_PTR(readl(dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)))), fw ? "" : "?");
+               /* We should never get a COMPLETED | IDLE_ACTIVE! */
+               GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-               while (head != tail) {
-                       struct i915_request *rq;
-                       unsigned int status;
-                       unsigned int count;
+               if (status & GEN8_CTX_STATUS_COMPLETE &&
+                   buf[2*head + 1] == execlists->preempt_complete_status) {
+                       GEM_TRACE("%s preempt-idle\n", engine->name);
+                       complete_preempt_context(execlists);
+                       continue;
+               }
 
-                       if (++head == GEN8_CSB_ENTRIES)
-                               head = 0;
+               if (status & GEN8_CTX_STATUS_PREEMPTED &&
+                   execlists_is_active(execlists,
+                                       EXECLISTS_ACTIVE_PREEMPT))
+                       continue;
 
-                       /* We are flying near dragons again.
-                        *
-                        * We hold a reference to the request in execlist_port[]
-                        * but no more than that. We are operating in softirq
-                        * context and so cannot hold any mutex or sleep. That
-                        * prevents us stopping the requests we are processing
-                        * in port[] from being retired simultaneously (the
-                        * breadcrumb will be complete before we see the
-                        * context-switch). As we only hold the reference to the
-                        * request, any pointer chasing underneath the request
-                        * is subject to a potential use-after-free. Thus we
-                        * store all of the bookkeeping within port[] as
-                        * required, and avoid using unguarded pointers beneath
-                        * request itself. The same applies to the atomic
-                        * status notifier.
-                        */
+               GEM_BUG_ON(!execlists_is_active(execlists,
+                                               EXECLISTS_ACTIVE_USER));
 
-                       status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
-                       GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, 
active=0x%x\n",
-                                 engine->name, head,
-                                 status, buf[2*head + 1],
-                                 execlists->active);
-
-                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-                                     GEN8_CTX_STATUS_PREEMPTED))
-                               execlists_set_active(execlists,
-                                                    EXECLISTS_ACTIVE_HWACK);
-                       if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-                               execlists_clear_active(execlists,
-                                                      EXECLISTS_ACTIVE_HWACK);
-
-                       if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-                               continue;
+               rq = port_unpack(port, &count);
+               GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
+                         engine->name,
+                         port->context_id, count,
+                         rq ? rq->global_seqno : 0,
+                         rq ? rq_prio(rq) : 0);
+
+               /* Check the context/desc id for this event matches */
+               GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
+
+               GEM_BUG_ON(count == 0);
+               if (--count == 0) {
+                       GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
+                       GEM_BUG_ON(port_isset(&port[1]) &&
+                                  !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH));
+                       GEM_BUG_ON(!i915_request_completed(rq));
+                       execlists_context_schedule_out(rq);
+                       trace_i915_request_out(rq);
+                       i915_request_put(rq);
+
+                       GEM_TRACE("%s completed ctx=%d\n",
+                                 engine->name, port->context_id);
+
+                       execlists_port_complete(execlists, port);
+               } else {
+                       port_set(port, port_pack(rq, count));
+               }
 
-                       /* We should never get a COMPLETED | IDLE_ACTIVE! */
-                       GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+               /* After the final element, the hw should be idle */
+               GEM_BUG_ON(port_count(port) == 0 &&
+                          !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
+               if (port_count(port) == 0)
+                       execlists_clear_active(execlists,
+                                              EXECLISTS_ACTIVE_USER);
+       }
 
-                       if (status & GEN8_CTX_STATUS_COMPLETE &&
-                           buf[2*head + 1] == 
execlists->preempt_complete_status) {
-                               GEM_TRACE("%s preempt-idle\n", engine->name);
-                               complete_preempt_context(execlists);
-                               continue;
-                       }
+       if (head != execlists->csb_head) {
+               execlists->csb_head = head;
+               writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
+                      i915->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
+       }
 
-                       if (status & GEN8_CTX_STATUS_PREEMPTED &&
-                           execlists_is_active(execlists,
-                                               EXECLISTS_ACTIVE_PREEMPT))
-                               continue;
+       if (unlikely(fw))
+               intel_uncore_forcewake_put(i915, execlists->fw_domains);
+}
 
-                       GEM_BUG_ON(!execlists_is_active(execlists,
-                                                       EXECLISTS_ACTIVE_USER));
-
-                       rq = port_unpack(port, &count);
-                       GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n",
-                                 engine->name,
-                                 port->context_id, count,
-                                 rq ? rq->global_seqno : 0,
-                                 rq ? rq_prio(rq) : 0);
-
-                       /* Check the context/desc id for this event matches */
-                       GEM_DEBUG_BUG_ON(buf[2 * head + 1] != port->context_id);
-
-                       GEM_BUG_ON(count == 0);
-                       if (--count == 0) {
-                               GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                               GEM_BUG_ON(port_isset(&port[1]) &&
-                                          !(status & 
GEN8_CTX_STATUS_ELEMENT_SWITCH));
-                               GEM_BUG_ON(!i915_request_completed(rq));
-                               execlists_context_schedule_out(rq);
-                               trace_i915_request_out(rq);
-                               i915_request_put(rq);
-
-                               GEM_TRACE("%s completed ctx=%d\n",
-                                         engine->name, port->context_id);
-
-                               execlists_port_complete(execlists, port);
-                       } else {
-                               port_set(port, port_pack(rq, count));
-                       }
+/*
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+static void execlists_submission_tasklet(unsigned long data)
+{
+       struct intel_engine_cs * const engine = (struct intel_engine_cs *)data;
 
-                       /* After the final element, the hw should be idle */
-                       GEM_BUG_ON(port_count(port) == 0 &&
-                                  !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-                       if (port_count(port) == 0)
-                               execlists_clear_active(execlists,
-                                                      EXECLISTS_ACTIVE_USER);
-               }
+       /*
+        * We can skip acquiring intel_runtime_pm_get() here as it was taken
+        * on our behalf by the request (see i915_gem_mark_busy()) and it will
+        * not be relinquished until the device is idle (see
+        * i915_gem_idle_work_handler()). As a precaution, we make sure
+        * that all ELSP are drained i.e. we have processed the CSB,
+        * before allowing ourselves to idle and calling intel_runtime_pm_put().
+        */
+       GEM_BUG_ON(!engine->i915->gt.awake);
 
-               if (head != execlists->csb_head) {
-                       execlists->csb_head = head;
-                       writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
-                              dev_priv->regs + 
i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
-               }
-       }
+       /*
+        * Prefer doing test_and_clear_bit() as a two stage operation to avoid
+        * imposing the cost of a locked atomic transaction when submitting a
+        * new request (outside of the context-switch interrupt).
+        */
+       if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted))
+               process_csb(engine);
 
-       if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
+       if (!execlists_is_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT))
                execlists_dequeue(engine);
-
-       if (fw)
-               intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
 }
 
 static void queue_request(struct intel_engine_cs *engine,
@@ -1667,6 +1673,7 @@ static struct i915_request *
 execlists_reset_prepare(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
+       struct i915_request *request, *active;
 
        /*
         * Prevent request submission to the hardware until we have
@@ -1688,7 +1695,39 @@ execlists_reset_prepare(struct intel_engine_cs *engine)
                tasklet_kill(&execlists->tasklet);
        tasklet_disable(&execlists->tasklet);
 
-       return i915_gem_find_active_request(engine);
+       /*
+        * We want to flush the pending context switches, having disabled
+        * the tasklet above, we can assume exclusive access to the execlists.
+        * For this allows us to catch up with an inflight preemption event,
+        * and avoid blaming an innocent request if the stall was due to the
+        * preemption itself.
+        */
+       process_csb(engine);
+
+       /*
+        * The last active request can then be no later than the last request
+        * now in ELSP[0]. So search backwards from there, so that is the GPU
+        * has advanced beyond the last CSB update, it will be pardoned.
+        */
+       active = NULL;
+       request = port_request(execlists->port);
+       if (request) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&engine->timeline->lock, flags);
+               list_for_each_entry_from_reverse(request,
+                                                &engine->timeline->requests,
+                                                link) {
+                       if (__i915_request_completed(request,
+                                                    request->global_seqno))
+                               break;
+
+                       active = request;
+               }
+               spin_unlock_irqrestore(&engine->timeline->lock, flags);
+       }
+
+       return active;
 }
 
 static void reset_irq(struct intel_engine_cs *engine)
-- 
2.16.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [RFC 5/8] drm/i915/execlists: Flush pending preemption events during reset

Reply via email to