add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function                                     old     new   delta
execlists_submit_ports                       262     471    +209
port_assign.isra                               -     136    +136
capture                                     6344    6359     +15
reset_common_ring                            438     452     +14
execlists_submit_request                     228     238     +10
gen8_init_common_ring                        334     341      +7
intel_engine_is_idle                         106     105      -1
i915_engine_info                            2314    2290     -24
__i915_gem_set_wedged_BKL                    485     411     -74
intel_lrc_irq_handler                       1789    1604    -185
execlists_update_context                     294       -    -294

The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c        |  32 ++++---
 drivers/gpu/drm/i915/i915_gem.c            |   6 +-
 drivers/gpu/drm/i915/i915_gpu_error.c      |  13 ++-
 drivers/gpu/drm/i915/i915_guc_submission.c |  18 ++--
 drivers/gpu/drm/i915/intel_engine_cs.c     |   2 +-
 drivers/gpu/drm/i915/intel_lrc.c           | 133 ++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   8 +-
 7 files changed, 120 insertions(+), 92 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index d689e511744e..0273591c43ad 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3317,6 +3317,7 @@ static int i915_engine_info(struct seq_file *m, void 
*unused)
                if (i915.enable_execlists) {
                        u32 ptr, read, write;
                        struct rb_node *rb;
+                       unsigned int idx;
 
                        seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
                                   I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3334,8 +3335,7 @@ static int i915_engine_info(struct seq_file *m, void 
*unused)
                        if (read > write)
                                write += GEN8_CSB_ENTRIES;
                        while (read < write) {
-                               unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+                               idx = ++read % GEN8_CSB_ENTRIES;
                                seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, 
context: %d\n",
                                           idx,
                                           
I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3343,21 +3343,19 @@ static int i915_engine_info(struct seq_file *m, void 
*unused)
                        }
 
                        rcu_read_lock();
-                       rq = READ_ONCE(engine->execlist_port[0].request);
-                       if (rq) {
-                               seq_printf(m, "\t\tELSP[0] count=%d, ",
-                                          engine->execlist_port[0].count);
-                               print_request(m, rq, "rq: ");
-                       } else {
-                               seq_printf(m, "\t\tELSP[0] idle\n");
-                       }
-                       rq = READ_ONCE(engine->execlist_port[1].request);
-                       if (rq) {
-                               seq_printf(m, "\t\tELSP[1] count=%d, ",
-                                          engine->execlist_port[1].count);
-                               print_request(m, rq, "rq: ");
-                       } else {
-                               seq_printf(m, "\t\tELSP[1] idle\n");
+                       for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); 
idx++) {
+                               unsigned int count;
+
+                               rq = port_unpack(&engine->execlist_port[idx],
+                                                count);
+                               if (rq) {
+                                       seq_printf(m, "\t\tELSP[%d] count=%d, ",
+                                                  idx, count);
+                                       print_request(m, rq, "rq: ");
+                               } else {
+                                       seq_printf(m, "\t\tELSP[%d] idle\n",
+                                                  idx);
+                               }
                        }
                        rcu_read_unlock();
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 82638895ad20..946050bf4282 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2967,12 +2967,14 @@ static void engine_set_wedged(struct intel_engine_cs 
*engine)
         */
 
        if (i915.enable_execlists) {
+               struct execlist_port *port = engine->execlist_port;
                unsigned long flags;
+               unsigned int n;
 
                spin_lock_irqsave(&engine->timeline->lock, flags);
 
-               i915_gem_request_put(engine->execlist_port[0].request);
-               i915_gem_request_put(engine->execlist_port[1].request);
+               for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+                       i915_gem_request_put(port_request(&port[n]));
                memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
                engine->execlist_queue = RB_ROOT;
                engine->execlist_first = NULL;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index 8effc59f5cb5..c86a3bd89468 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1316,12 +1316,17 @@ static void engine_record_requests(struct 
intel_engine_cs *engine,
 static void error_record_engine_execlists(struct intel_engine_cs *engine,
                                          struct drm_i915_error_engine *ee)
 {
+       const struct execlist_port *port = engine->execlist_port;
        unsigned int n;
 
-       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-               if (engine->execlist_port[n].request)
-                       record_request(engine->execlist_port[n].request,
-                                      &ee->execlist[n]);
+       for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+               struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+               if (!rq)
+                       break;
+
+               record_request(rq, &ee->execlist[n]);
+       }
 }
 
 static void record_context(struct drm_i915_error_context *e,
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 1642fff9cf13..370373c97b81 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -658,7 +658,7 @@ static void nested_enable_signaling(struct 
drm_i915_gem_request *rq)
 static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 {
        struct execlist_port *port = engine->execlist_port;
-       struct drm_i915_gem_request *last = port[0].request;
+       struct drm_i915_gem_request *last = port[0].request_count;
        struct rb_node *rb;
        bool submit = false;
 
@@ -672,7 +672,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
                        if (port != engine->execlist_port)
                                break;
 
-                       i915_gem_request_assign(&port->request, last);
+                       i915_gem_request_assign(&port->request_count, last);
                        nested_enable_signaling(last);
                        port++;
                }
@@ -688,7 +688,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
                submit = true;
        }
        if (submit) {
-               i915_gem_request_assign(&port->request, last);
+               i915_gem_request_assign(&port->request_count, last);
                nested_enable_signaling(last);
                engine->execlist_first = rb;
        }
@@ -705,17 +705,19 @@ static void i915_guc_irq_handler(unsigned long data)
        bool submit;
 
        do {
-               rq = port[0].request;
+               rq = port[0].request_count;
                while (rq && i915_gem_request_completed(rq)) {
                        trace_i915_gem_request_out(rq);
                        i915_gem_request_put(rq);
-                       port[0].request = port[1].request;
-                       port[1].request = NULL;
-                       rq = port[0].request;
+
+                       port[0].request_count = port[1].request_count;
+                       port[1].request_count = NULL;
+
+                       rq = port[0].request_count;
                }
 
                submit = false;
-               if (!port[1].request)
+               if (!port[1].request_count)
                        submit = i915_guc_dequeue(engine);
        } while (submit);
 }
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 854e8e0c836b..fca7160a29c2 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1096,7 +1096,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
                return false;
 
        /* Both ports drained, no more ELSP submission? */
-       if (engine->execlist_port[0].request)
+       if (port_request(&engine->execlist_port[0]))
                return false;
 
        /* Ring stopped? */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index c8f7c631fc1f..fcc74caaf291 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -342,39 +342,32 @@ static u64 execlists_update_context(struct 
drm_i915_gem_request *rq)
 
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
-       struct drm_i915_private *dev_priv = engine->i915;
        struct execlist_port *port = engine->execlist_port;
        u32 __iomem *elsp =
-               dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
-       u64 desc[2];
-
-       GEM_BUG_ON(port[0].count > 1);
-       if (!port[0].count)
-               execlists_context_status_change(port[0].request,
-                                               INTEL_CONTEXT_SCHEDULE_IN);
-       desc[0] = execlists_update_context(port[0].request);
-       GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0]));
-       port[0].count++;
-
-       if (port[1].request) {
-               GEM_BUG_ON(port[1].count);
-               execlists_context_status_change(port[1].request,
-                                               INTEL_CONTEXT_SCHEDULE_IN);
-               desc[1] = execlists_update_context(port[1].request);
-               GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1]));
-               port[1].count = 1;
-       } else {
-               desc[1] = 0;
-       }
-       GEM_BUG_ON(desc[0] == desc[1]);
-
-       /* You must always write both descriptors in the order below. */
-       writel(upper_32_bits(desc[1]), elsp);
-       writel(lower_32_bits(desc[1]), elsp);
+               engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+       unsigned int n;
+
+       for (n = ARRAY_SIZE(engine->execlist_port); n--; ) {
+               struct drm_i915_gem_request *rq;
+               unsigned int count;
+               u64 desc;
+
+               rq = port_unpack(&port[n], count);
+               if (rq) {
+                       GEM_BUG_ON(count > !n);
+                       if (!count++)
+                               execlists_context_status_change(rq, 
INTEL_CONTEXT_SCHEDULE_IN);
+                       port[n].request_count = port_pack(rq, count);
+                       desc = execlists_update_context(rq);
+                       GEM_DEBUG_EXEC(port[n].context_id = 
upper_32_bits(desc));
+               } else {
+                       GEM_BUG_ON(!n);
+                       desc = 0;
+               }
 
-       writel(upper_32_bits(desc[0]), elsp);
-       /* The context is automatically loaded after the following */
-       writel(lower_32_bits(desc[0]), elsp);
+               writel(upper_32_bits(desc), elsp);
+               writel(lower_32_bits(desc), elsp);
+       }
 }
 
 static bool ctx_single_port_submission(const struct i915_gem_context *ctx)
@@ -395,6 +388,18 @@ static bool can_merge_ctx(const struct i915_gem_context 
*prev,
        return true;
 }
 
+static void port_assign(struct execlist_port *port,
+                       struct drm_i915_gem_request *rq)
+{
+       GEM_BUG_ON(rq == port_request(port));
+
+       if (port->request_count)
+               i915_gem_request_put(port_request(port));
+
+       port->request_count =
+               port_pack(i915_gem_request_get(rq), port_count(port));
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
        struct drm_i915_gem_request *last;
@@ -402,7 +407,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
        struct rb_node *rb;
        bool submit = false;
 
-       last = port->request;
+       last = port_request(port);
        if (last)
                /* WaIdleLiteRestore:bdw,skl
                 * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL
@@ -412,7 +417,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                 */
                last->tail = last->wa_tail;
 
-       GEM_BUG_ON(port[1].request);
+       GEM_BUG_ON(port[1].request_count);
 
        /* Hardware submission is through 2 ports. Conceptually each port
         * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is
@@ -469,7 +474,8 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
 
                        GEM_BUG_ON(last->ctx == cursor->ctx);
 
-                       i915_gem_request_assign(&port->request, last);
+                       if (submit)
+                               port_assign(port, last);
                        port++;
                }
 
@@ -484,7 +490,7 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
                submit = true;
        }
        if (submit) {
-               i915_gem_request_assign(&port->request, last);
+               port_assign(port, last);
                engine->execlist_first = rb;
        }
        spin_unlock_irq(&engine->timeline->lock);
@@ -495,14 +501,14 @@ static void execlists_dequeue(struct intel_engine_cs 
*engine)
 
 static bool execlists_elsp_idle(struct intel_engine_cs *engine)
 {
-       return !engine->execlist_port[0].request;
+       return !port_count(&engine->execlist_port[0]);
 }
 
 static bool execlists_elsp_ready(const struct intel_engine_cs *engine)
 {
        const struct execlist_port *port = engine->execlist_port;
 
-       return port[0].count + port[1].count < 2;
+       return port_count(&port[0]) + port_count(&port[1]) < 2;
 }
 
 /*
@@ -543,7 +549,9 @@ static void intel_lrc_irq_handler(unsigned long data)
                tail = GEN8_CSB_WRITE_PTR(head);
                head = GEN8_CSB_READ_PTR(head);
                while (head != tail) {
+                       struct drm_i915_gem_request *rq;
                        unsigned int status;
+                       unsigned int count;
 
                        if (++head == GEN8_CSB_ENTRIES)
                                head = 0;
@@ -573,20 +581,24 @@ static void intel_lrc_irq_handler(unsigned long data)
                        GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
                                         port[0].context_id);
 
-                       GEM_BUG_ON(port[0].count == 0);
-                       if (--port[0].count == 0) {
+                       rq = port_unpack(&port[0], count);
+                       GEM_BUG_ON(count == 0);
+                       if (--count == 0) {
                                GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
-                               
GEM_BUG_ON(!i915_gem_request_completed(port[0].request));
-                               execlists_context_status_change(port[0].request,
-                                                               
INTEL_CONTEXT_SCHEDULE_OUT);
+                               GEM_BUG_ON(!i915_gem_request_completed(rq));
+                               execlists_context_status_change(rq, 
INTEL_CONTEXT_SCHEDULE_OUT);
+
+                               trace_i915_gem_request_out(rq);
+                               i915_gem_request_put(rq);
 
-                               trace_i915_gem_request_out(port[0].request);
-                               i915_gem_request_put(port[0].request);
                                port[0] = port[1];
                                memset(&port[1], 0, sizeof(port[1]));
+                       } else {
+                               port[0].request_count = port_pack(rq, count);
                        }
 
-                       GEM_BUG_ON(port[0].count == 0 &&
+                       /* After the final element, the hw should be idle */
+                       GEM_BUG_ON(port_count(&port[0]) == 0 &&
                                   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
                }
 
@@ -1139,11 +1151,6 @@ static int intel_init_workaround_bb(struct 
intel_engine_cs *engine)
        return ret;
 }
 
-static u32 port_seqno(struct execlist_port *port)
-{
-       return port->request ? port->request->global_seqno : 0;
-}
-
 static int gen8_init_common_ring(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *dev_priv = engine->i915;
@@ -1168,12 +1175,22 @@ static int gen8_init_common_ring(struct intel_engine_cs 
*engine)
        /* After a GPU reset, we may have requests to replay */
        clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
        if (!i915.enable_guc_submission && !execlists_elsp_idle(engine)) {
-               DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n",
-                                engine->name,
-                                port_seqno(&engine->execlist_port[0]),
-                                port_seqno(&engine->execlist_port[1]));
-               engine->execlist_port[0].count = 0;
-               engine->execlist_port[1].count = 0;
+               struct execlist_port *port = engine->execlist_port;
+               unsigned int n;
+
+               for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+                       if (!port[n].request_count)
+                               break;
+
+                       DRM_DEBUG_DRIVER("Restarting %s from 0x%x [%d]\n",
+                                        engine->name,
+                                        port_request(&port[n])->global_seqno,
+                                        n);
+
+                       /* Discard the current inflight count */
+                       port[n].request_count = port_request(&port[n]);
+               }
+
                execlists_submit_ports(engine);
        }
 
@@ -1252,13 +1269,13 @@ static void reset_common_ring(struct intel_engine_cs 
*engine,
        intel_ring_update_space(request->ring);
 
        /* Catch up with any missed context-switch interrupts */
-       if (request->ctx != port[0].request->ctx) {
-               i915_gem_request_put(port[0].request);
+       if (request->ctx != port_request(&port[0])->ctx) {
+               i915_gem_request_put(port_request(&port[0]));
                port[0] = port[1];
                memset(&port[1], 0, sizeof(port[1]));
        }
 
-       GEM_BUG_ON(request->ctx != port[0].request->ctx);
+       GEM_BUG_ON(request->ctx != port_request(&port[0])->ctx);
 
        /* Reset WaIdleLiteRestore:bdw,skl as well */
        request->tail =
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index a82a0807f64d..51497653c830 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -373,8 +373,12 @@ struct intel_engine_cs {
        /* Execlists */
        struct tasklet_struct irq_tasklet;
        struct execlist_port {
-               struct drm_i915_gem_request *request;
-               unsigned int count;
+               struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, 
EXECLIST_COUNT_BITS)
                GEM_DEBUG_DECL(u32 context_id);
        } execlist_port[2];
        struct rb_root execlist_queue;
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to