[Intel-gfx] [PATCH 4/4] drm/i915: Combine multiple internal plists into the same i915_priolist bucket

2018-09-28 Thread Chris Wilson
As we are about to allow ourselves to slightly bump the user priority
into a few different sublevels, packthose internal priority lists
into the same i915_priolist to keep the rbtree compact and avoid having
to allocate the default user priority even after the internal bumping.
The downside to having an requests[] rather than a node per active list,
is that we then have to walk over the empty higher priority lists. To
compensate, we track the active buckets and use a small bitmap to skip
over any inactive ones.

v2: Use MASK of internal levels to simplify our usage.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
 drivers/gpu/drm/i915/intel_lrc.c| 87 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++-
 4 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 6726d57f018f..1c6143bdf5a4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
count = 0;
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-   struct i915_priolist *p =
-   rb_entry(rb, typeof(*p), node);
+   struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+   int i;
 
-   list_for_each_entry(rq, &p->requests, sched.link) {
+   priolist_for_each_request(rq, p, i) {
if (count++ < MAX_REQUESTS_TO_SHOW - 1)
print_request(m, rq, "\t\tQ ");
else
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4874a212754c..ac862b42f6a1 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
+   int i;
 
-   list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+   priolist_for_each_request_consume(rq, rn, p, i) {
if (last && rq->hw_context != last->hw_context) {
-   if (port == last_port) {
-   __list_del_many(&p->requests,
-   &rq->sched.link);
+   if (port == last_port)
goto done;
-   }
 
if (submit)
port_assign(port, last);
port++;
}
 
-   INIT_LIST_HEAD(&rq->sched.link);
+   list_del_init(&rq->sched.link);
 
__i915_request_submit(rq);
trace_i915_request_in(rq, port_index(port, execlists));
+
last = rq;
submit = true;
}
 
rb_erase_cached(&p->node, &execlists->queue);
-   INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 15345e74d8ce..0c37a3c9b719 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct 
i915_gem_context *ctx,
ce->lrc_desc = desc;
 }
 
-static struct i915_priolist *
+static void assert_priolists(struct intel_engine_execlists * const execlists,
+int queue_priority)
+{
+   struct rb_node *rb;
+   int last_prio, i;
+
+   if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+   return;
+
+   GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
+  rb_first(&execlists->queue.rb_root));
+
+   last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+   for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+   struct i915_priolist *p = to_priolist(rb);
+
+   GEM_BUG_ON(p->priority >= last_prio);
+   last_prio = p->priority;
+
+   GEM_BUG_ON(!p->used);
+   for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+   if (list_empty(&p->requests[i]))
+   continue;
+
+  

[Intel-gfx] [PATCH 4/4] drm/i915: Combine multiple internal plists into the same i915_priolist bucket

2018-10-01 Thread Chris Wilson
As we are about to allow ourselves to slightly bump the user priority
into a few different sublevels, packthose internal priority lists
into the same i915_priolist to keep the rbtree compact and avoid having
to allocate the default user priority even after the internal bumping.
The downside to having an requests[] rather than a node per active list,
is that we then have to walk over the empty higher priority lists. To
compensate, we track the active buckets and use a small bitmap to skip
over any inactive ones.

v2: Use MASK of internal levels to simplify our usage.
v3: Prevent overflow when SHIFT is zero.

Signed-off-by: Chris Wilson 
Cc: Tvrtko Ursulin 
Reviewed-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/intel_engine_cs.c  |  6 +-
 drivers/gpu/drm/i915/intel_guc_submission.c | 12 ++-
 drivers/gpu/drm/i915/intel_lrc.c| 87 ++---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 13 ++-
 4 files changed, 80 insertions(+), 38 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c 
b/drivers/gpu/drm/i915/intel_engine_cs.c
index 6726d57f018f..1c6143bdf5a4 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1534,10 +1534,10 @@ void intel_engine_dump(struct intel_engine_cs *engine,
count = 0;
drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority);
for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
-   struct i915_priolist *p =
-   rb_entry(rb, typeof(*p), node);
+   struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+   int i;
 
-   list_for_each_entry(rq, &p->requests, sched.link) {
+   priolist_for_each_request(rq, p, i) {
if (count++ < MAX_REQUESTS_TO_SHOW - 1)
print_request(m, rq, "\t\tQ ");
else
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c 
b/drivers/gpu/drm/i915/intel_guc_submission.c
index 4874a212754c..ac862b42f6a1 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -746,30 +746,28 @@ static bool __guc_dequeue(struct intel_engine_cs *engine)
while ((rb = rb_first_cached(&execlists->queue))) {
struct i915_priolist *p = to_priolist(rb);
struct i915_request *rq, *rn;
+   int i;
 
-   list_for_each_entry_safe(rq, rn, &p->requests, sched.link) {
+   priolist_for_each_request_consume(rq, rn, p, i) {
if (last && rq->hw_context != last->hw_context) {
-   if (port == last_port) {
-   __list_del_many(&p->requests,
-   &rq->sched.link);
+   if (port == last_port)
goto done;
-   }
 
if (submit)
port_assign(port, last);
port++;
}
 
-   INIT_LIST_HEAD(&rq->sched.link);
+   list_del_init(&rq->sched.link);
 
__i915_request_submit(rq);
trace_i915_request_in(rq, port_index(port, execlists));
+
last = rq;
submit = true;
}
 
rb_erase_cached(&p->node, &execlists->queue);
-   INIT_LIST_HEAD(&p->requests);
if (p->priority != I915_PRIORITY_NORMAL)
kmem_cache_free(engine->i915->priorities, p);
}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 15345e74d8ce..4ee00f531153 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -259,14 +259,49 @@ intel_lr_context_descriptor_update(struct 
i915_gem_context *ctx,
ce->lrc_desc = desc;
 }
 
-static struct i915_priolist *
+static void assert_priolists(struct intel_engine_execlists * const execlists,
+long queue_priority)
+{
+   struct rb_node *rb;
+   long last_prio, i;
+
+   if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+   return;
+
+   GEM_BUG_ON(rb_first_cached(&execlists->queue) !=
+  rb_first(&execlists->queue.rb_root));
+
+   last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1;
+   for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) {
+   struct i915_priolist *p = to_priolist(rb);
+
+   GEM_BUG_ON(p->priority >= last_prio);
+   last_prio = p->priority;
+
+   GEM_BUG_ON(!p->used);
+   for (i = 0; i < ARRAY_SIZE(p->requests); i++) {
+   if (list_empty(&p->requests[i]))
+