bdw: Two-stage execlist submit process

oscar . mateo Fri, 11 Jul 2014 09:05:40 -0700

From: Michel Thierry <michel.thie...@intel.com>

Context switch (and execlist submission) should happen only when
other contexts are not active, otherwise pre-emption occurs.


To assure this, we place context switch requests in a queue and those
request are later consumed when the right context switch interrupt is
received (still TODO).

v2: Use a spinlock, do not remove the requests on unqueue (wait for
context switch completion).

Signed-off-by: Thomas Daniel <thomas.dan...@intel.com>

v3: Several rebases and code changes. Use unique ID.

v4:
- Move the queue/lock init to the late ring initialization.
- Damien's kmalloc review comments: check return, use sizeof(*req),
do not cast.

v5:
- Do not reuse drm_i915_gem_request. Instead, create our own.
- New namespace.

Signed-off-by: Michel Thierry <michel.thie...@intel.com> (v1)
Signed-off-by: Oscar Mateo <oscar.ma...@intel.com> (v2-v5)
---
 drivers/gpu/drm/i915/intel_lrc.c        | 63 +++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/intel_lrc.h        |  8 +++++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 ++
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ceee121..68993f8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -217,6 +217,63 @@ static int execlists_submit_context(struct intel_engine_cs 
*ring,
        return 0;
 }
 
+static void execlists_context_unqueue(struct intel_engine_cs *ring)
+{
+       struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
+       struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
+
+       if (list_empty(&ring->execlist_queue))
+               return;
+
+       /* Try to read in pairs */
+       list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, 
execlist_link) {
+               if (!req0)
+                       req0 = cursor;
+               else if (req0->ctx == cursor->ctx) {
+                       /* Same ctx: ignore first request, as second request
+                        * will update tail past first request's workload */
+                       list_del(&req0->execlist_link);
+                       i915_gem_context_unreference(req0->ctx);
+                       kfree(req0);
+                       req0 = cursor;
+               } else {
+                       req1 = cursor;
+                       break;
+               }
+       }
+
+       BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
+                       req1? req1->ctx : NULL, req1? req1->tail : 0));
+}
+
+static int execlists_context_queue(struct intel_engine_cs *ring,
+                                  struct intel_context *to,
+                                  u32 tail)
+{
+       struct intel_ctx_submit_request *req = NULL;
+       unsigned long flags;
+       bool was_empty;
+
+       req = kzalloc(sizeof(*req), GFP_KERNEL);
+       if (req == NULL)
+               return -ENOMEM;
+       req->ctx = to;
+       i915_gem_context_reference(req->ctx);
+       req->ring = ring;
+       req->tail = tail;
+
+       spin_lock_irqsave(&ring->execlist_lock, flags);
+
+       was_empty = list_empty(&ring->execlist_queue);
+       list_add_tail(&req->execlist_link, &ring->execlist_queue);
+       if (was_empty)
+               execlists_context_unqueue(ring);
+
+       spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+       return 0;
+}
+
 static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
 {
        struct intel_engine_cs *ring = ringbuf->ring;
@@ -403,8 +460,7 @@ void intel_logical_ring_advance_and_submit(struct 
intel_ringbuffer *ringbuf)
        if (intel_ring_stopped(ring))
                return;
 
-       /* FIXME: too cheeky, we don't even check if the ELSP is ready */
-       execlists_submit_context(ring, ctx, ringbuf->tail, NULL, 0);
+       execlists_context_queue(ring, ctx, ringbuf->tail);
 }
 
 static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
@@ -844,6 +900,9 @@ static int logical_ring_init(struct drm_device *dev, struct 
intel_engine_cs *rin
        INIT_LIST_HEAD(&ring->request_list);
        init_waitqueue_head(&ring->irq_queue);
 
+       INIT_LIST_HEAD(&ring->execlist_queue);
+       spin_lock_init(&ring->execlist_lock);
+
        ret = intel_lr_context_deferred_create(dctx, ring);
        if (ret)
                return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index b59965b..14492a9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -60,4 +60,12 @@ int intel_execlists_submission(struct drm_device *dev, 
struct drm_file *file,
                               u64 exec_start, u32 flags);
 u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
 
+struct intel_ctx_submit_request {
+       struct intel_context *ctx;
+       struct intel_engine_cs *ring;
+       u32 tail;
+
+       struct list_head execlist_link;
+};
+
 #endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c885d5c..6358823 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -223,6 +223,8 @@ struct  intel_engine_cs {
        } semaphore;
 
        /* Execlists */
+       spinlock_t execlist_lock;
+       struct list_head execlist_queue;
        u32             irq_keep_mask; /* bitmask for interrupts that should 
not be masked */
        int             (*emit_request)(struct intel_ringbuffer *ringbuf);
        int             (*emit_flush)(struct intel_ringbuffer *ringbuf,
-- 
1.9.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 29/42] drm/i915/bdw: Two-stage execlist submit process

Reply via email to