Re: [Intel-gfx] [PATCH v2 5/6] drm/i915/guc: use for_each_engine_id() where appropriate

2016-07-22 Thread Dave Gordon

On 22/07/16 11:04, Tvrtko Ursulin wrote:


On 21/07/16 19:15, Dave Gordon wrote:

Now that host structures are indexed by host engine-id rather than
guc_id, we can usefully convert some for_each_engine() loops to use
for_each_engine_id() and avoid multiple dereferences of engine->id.

Also a few related tweaks to cache structure members locally wherever
they're used more than once or twice, hopefully eliminating memory
references.

Signed-off-by: Dave Gordon 
---
  drivers/gpu/drm/i915/i915_debugfs.c| 17 +
  drivers/gpu/drm/i915/i915_guc_submission.c | 22 +-
  2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
b/drivers/gpu/drm/i915/i915_debugfs.c
index 793b1d9..2106766 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file
*m,
   struct i915_guc_client *client)
  {
  struct intel_engine_cs *engine;
+enum intel_engine_id id;
  uint64_t tot = 0;

  seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct
seq_file *m,
  seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
  seq_printf(m, "\tLast submission result: %d\n", client->retcode);

-for_each_engine(engine, dev_priv) {
+for_each_engine_id(engine, dev_priv, id) {
+u64 submissions = client->submissions[id];
+tot += submissions;
  seq_printf(m, "\tSubmissions: %llu %s\n",
-client->submissions[engine->id],
-engine->name);
-tot += client->submissions[engine->id];
+submissions, engine->name);
  }
  seq_printf(m, "\tTotal: %llu\n", tot);
  }
@@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m,
void *data)
  seq_printf(m, "GuC last action error code: %d\n", guc.action_err);

  seq_printf(m, "\nGuC submissions:\n");
-for_each_engine(engine, dev_priv) {
+for_each_engine_id(engine, dev_priv, id) {
+u64 submissions = guc.submissions[id];
+total += submissions;
  seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-engine->name, guc.submissions[engine->id],
-guc.last_seqno[engine->id]);
-total += guc.submissions[engine->id];
+engine->name, submissions, guc.last_seqno[id]);
  }
  seq_printf(m, "\t%s: %llu\n", "Total", total);

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 6756db0..ece3479 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc,

  for_each_engine_masked(engine, dev_priv, client->engines) {
  struct intel_context *ce = &ctx->engine[engine->id];
-struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
+uint32_t guc_engine_id = engine->guc_id;
+struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
  struct drm_i915_gem_object *obj;

  /* TODO: We have a design issue to be solved here. Only when we
@@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
  gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
  lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
  lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-(engine->guc_id << GUC_ELC_ENGINE_OFFSET);
+(guc_engine_id << GUC_ELC_ENGINE_OFFSET);

  obj = ce->ringbuf->obj;
  gfx_addr = i915_gem_obj_ggtt_offset(obj);
@@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
  lrc->ring_next_free_location = gfx_addr;
  lrc->ring_current_tail_pointer_value = 0;

-desc.engines_used |= (1 << engine->guc_id);
+desc.engines_used |= (1 << guc_engine_id);
  }

  DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
@@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct
i915_guc_client *gc,
  /* wqi_len is in DWords, and does not include the one-word
header */
  const size_t wqi_size = sizeof(struct guc_wq_item);
  const u32 wqi_len = wqi_size/sizeof(u32) - 1;
+struct intel_engine_cs *engine = rq->engine;
  struct guc_process_desc *desc;
  struct guc_wq_item *wqi;
  void *base;
@@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct
i915_guc_client *gc,
  /* Now fill in the 4-word work queue item */
  wqi->header = WQ_TYPE_INORDER |
  (wqi_len << WQ_LEN_SHIFT) |
-(rq->engine->guc_id << WQ_TARGET_SHIFT) |
+(engine->guc_id << WQ_TARGET_SHIFT) |
  WQ_NO_WCFLUSH_WAIT;

  /* The GuC wants only the low-order word of the context
descriptor */
-wqi->context_desc = (u32)intel_lr_context_descri

Re: [Intel-gfx] [PATCH v2 5/6] drm/i915/guc: use for_each_engine_id() where appropriate

2016-07-22 Thread Tvrtko Ursulin


On 21/07/16 19:15, Dave Gordon wrote:

Now that host structures are indexed by host engine-id rather than
guc_id, we can usefully convert some for_each_engine() loops to use
for_each_engine_id() and avoid multiple dereferences of engine->id.

Also a few related tweaks to cache structure members locally wherever
they're used more than once or twice, hopefully eliminating memory
references.

Signed-off-by: Dave Gordon 
---
  drivers/gpu/drm/i915/i915_debugfs.c| 17 +
  drivers/gpu/drm/i915/i915_guc_submission.c | 22 +-
  2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 793b1d9..2106766 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file *m,
 struct i915_guc_client *client)
  {
struct intel_engine_cs *engine;
+   enum intel_engine_id id;
uint64_t tot = 0;

seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct seq_file *m,
seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
seq_printf(m, "\tLast submission result: %d\n", client->retcode);

-   for_each_engine(engine, dev_priv) {
+   for_each_engine_id(engine, dev_priv, id) {
+   u64 submissions = client->submissions[id];
+   tot += submissions;
seq_printf(m, "\tSubmissions: %llu %s\n",
-   client->submissions[engine->id],
-   engine->name);
-   tot += client->submissions[engine->id];
+   submissions, engine->name);
}
seq_printf(m, "\tTotal: %llu\n", tot);
  }
@@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, void *data)
seq_printf(m, "GuC last action error code: %d\n", guc.action_err);

seq_printf(m, "\nGuC submissions:\n");
-   for_each_engine(engine, dev_priv) {
+   for_each_engine_id(engine, dev_priv, id) {
+   u64 submissions = guc.submissions[id];
+   total += submissions;
seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-   engine->name, guc.submissions[engine->id],
-   guc.last_seqno[engine->id]);
-   total += guc.submissions[engine->id];
+   engine->name, submissions, guc.last_seqno[id]);
}
seq_printf(m, "\t%s: %llu\n", "Total", total);

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 6756db0..ece3479 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc,

for_each_engine_masked(engine, dev_priv, client->engines) {
struct intel_context *ce = &ctx->engine[engine->id];
-   struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
+   uint32_t guc_engine_id = engine->guc_id;
+   struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
struct drm_i915_gem_object *obj;

/* TODO: We have a design issue to be solved here. Only when we
@@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-   (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
+   (guc_engine_id << GUC_ELC_ENGINE_OFFSET);

obj = ce->ringbuf->obj;
gfx_addr = i915_gem_obj_ggtt_offset(obj);
@@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
lrc->ring_next_free_location = gfx_addr;
lrc->ring_current_tail_pointer_value = 0;

-   desc.engines_used |= (1 << engine->guc_id);
+   desc.engines_used |= (1 << guc_engine_id);
}

DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
@@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
/* wqi_len is in DWords, and does not include the one-word header */
const size_t wqi_size = sizeof(struct guc_wq_item);
const u32 wqi_len = wqi_size/sizeof(u32) - 1;
+   struct intel_engine_cs *engine = rq->engine;
struct guc_process_desc *desc;
struct guc_wq_item *wqi;
void *base;
@@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
/* Now fill in the 4-word work queue item */
wqi->header = WQ_TYPE_INORDER |
 

[Intel-gfx] [PATCH v2 5/6] drm/i915/guc: use for_each_engine_id() where appropriate

2016-07-21 Thread Dave Gordon
Now that host structures are indexed by host engine-id rather than
guc_id, we can usefully convert some for_each_engine() loops to use
for_each_engine_id() and avoid multiple dereferences of engine->id.

Also a few related tweaks to cache structure members locally wherever
they're used more than once or twice, hopefully eliminating memory
references.

Signed-off-by: Dave Gordon 
---
 drivers/gpu/drm/i915/i915_debugfs.c| 17 +
 drivers/gpu/drm/i915/i915_guc_submission.c | 22 +-
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 793b1d9..2106766 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2535,6 +2535,7 @@ static void i915_guc_client_info(struct seq_file *m,
 struct i915_guc_client *client)
 {
struct intel_engine_cs *engine;
+   enum intel_engine_id id;
uint64_t tot = 0;
 
seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
@@ -2549,11 +2550,11 @@ static void i915_guc_client_info(struct seq_file *m,
seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
seq_printf(m, "\tLast submission result: %d\n", client->retcode);
 
-   for_each_engine(engine, dev_priv) {
+   for_each_engine_id(engine, dev_priv, id) {
+   u64 submissions = client->submissions[id];
+   tot += submissions;
seq_printf(m, "\tSubmissions: %llu %s\n",
-   client->submissions[engine->id],
-   engine->name);
-   tot += client->submissions[engine->id];
+   submissions, engine->name);
}
seq_printf(m, "\tTotal: %llu\n", tot);
 }
@@ -2598,11 +2599,11 @@ static int i915_guc_info(struct seq_file *m, void *data)
seq_printf(m, "GuC last action error code: %d\n", guc.action_err);
 
seq_printf(m, "\nGuC submissions:\n");
-   for_each_engine(engine, dev_priv) {
+   for_each_engine_id(engine, dev_priv, id) {
+   u64 submissions = guc.submissions[id];
+   total += submissions;
seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-   engine->name, guc.submissions[engine->id],
-   guc.last_seqno[engine->id]);
-   total += guc.submissions[engine->id];
+   engine->name, submissions, guc.last_seqno[id]);
}
seq_printf(m, "\t%s: %llu\n", "Total", total);
 
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c 
b/drivers/gpu/drm/i915/i915_guc_submission.c
index 6756db0..ece3479 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -342,7 +342,8 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
 
for_each_engine_masked(engine, dev_priv, client->engines) {
struct intel_context *ce = &ctx->engine[engine->id];
-   struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id];
+   uint32_t guc_engine_id = engine->guc_id;
+   struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
struct drm_i915_gem_object *obj;
 
/* TODO: We have a design issue to be solved here. Only when we
@@ -361,7 +362,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
gfx_addr = i915_gem_obj_ggtt_offset(ce->state);
lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE;
lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
-   (engine->guc_id << GUC_ELC_ENGINE_OFFSET);
+   (guc_engine_id << GUC_ELC_ENGINE_OFFSET);
 
obj = ce->ringbuf->obj;
gfx_addr = i915_gem_obj_ggtt_offset(obj);
@@ -371,7 +372,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
lrc->ring_next_free_location = gfx_addr;
lrc->ring_current_tail_pointer_value = 0;
 
-   desc.engines_used |= (1 << engine->guc_id);
+   desc.engines_used |= (1 << guc_engine_id);
}
 
DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
@@ -461,6 +462,7 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
/* wqi_len is in DWords, and does not include the one-word header */
const size_t wqi_size = sizeof(struct guc_wq_item);
const u32 wqi_len = wqi_size/sizeof(u32) - 1;
+   struct intel_engine_cs *engine = rq->engine;
struct guc_process_desc *desc;
struct guc_wq_item *wqi;
void *base;
@@ -502,12 +504,11 @@ static void guc_add_workqueue_item(struct i915_guc_client 
*gc,
/* Now fill in the 4-word work queue item */
wqi->header = WQ_TYPE_INORDER |
(wqi_len << WQ_LEN_SHIFT) |
-