[Mesa-dev] [PATCH 06/16] i965/gen7.5: Enable hardware-generated binding tables on render path.

2013-10-10 Thread Abdiel Janulgue
This patch implements the binding table enable command which is also
used to allocate a binding table pool where where hardware-generated
binding table entries are flushed into.

Each binding table offset in the binding table pool is unique per
each shader stage that are enabled within a batch.

In addition, this change inserts the required brw_tracked_state objects
to enable hw-generated binding tables in normal render path.

v3: Update binding table pool offsets on new geometry shader constant
buffers, geometry shader program cache, and fragment shader program
cache. Previous implementation failed to catch this flags.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c |   87 
 src/mesa/drivers/dri/i965/brw_context.c|1 +
 src/mesa/drivers/dri/i965/brw_context.h|1 +
 src/mesa/drivers/dri/i965/brw_defines.h|3 +
 src/mesa/drivers/dri/i965/brw_state.h  |5 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c   |3 +
 6 files changed, 100 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 9d15bac..534fbb8 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -164,6 +164,93 @@ const struct brw_tracked_state brw_gs_binding_table = {
.emit = brw_gs_upload_binding_table,
 };
 
+/**
+ * Hardware-generated binding tables for the resource streamer
+ */
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->has_resource_streamer)
+  return;
+
+   if (!brw->hw_bt_pool)
+  gen7_update_hw_bt(brw);
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
+   OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0,
+ HSW_BINDING_TABLE_ALLOC_OFFSET);
+   OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0,
+ brw->hw_bt_pool->size);
+   ADVANCE_BATCH();
+
+   /* Pipe control workaround */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+
+   /* Do a block clear for existing on-chip binding table entries
+  that might have stuck from the old batch*/
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_VS << 16 | (3 - 2));
+   OUT_BATCH(0x << 16 | 0x3 );
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_PS << 16 | (3 - 2));
+   OUT_BATCH(0x << 16 | 0x3 );
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+void
+gen7_update_hw_bt(struct brw_context *brw)
+{
+   static const int bt_size = 256 * sizeof(uint16_t);
+
+   if (!brw->has_resource_streamer)
+  return;
+
+   if (brw->hw_bt_pool &&
+   (brw->wm.base.bind_bo_offset + bt_size) < brw->hw_bt_pool->size) {
+  brw->vs.base.bind_bo_offset = brw->wm.base.bind_bo_offset + bt_size;
+  brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size;
+   } else {
+  brw->vs.base.bind_bo_offset = 64;
+  brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size;
+  drm_intel_bo_unreference(brw->hw_bt_pool);
+  brw->hw_bt_pool = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+   65536, 4096);
+   }
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+   .dirty = {
+  .mesa = 0,
+  .brw = BRW_NEW_BATCH,
+  .cache = 0
+   },
+   .emit = gen7_enable_hw_binding_tables
+};
+
+const struct brw_tracked_state gen7_hw_bt_update = {
+   .dirty = {
+  .mesa = 0,
+  .brw = (BRW_NEW_BATCH |
+  BRW_NEW_VS_CONSTBUF |
+  BRW_NEW_GS_CONSTBUF |
+  BRW_NEW_SURFACES),
+  .cache = (CACHE_NEW_VS_PROG |
+CACHE_NEW_GS_PROG |
+CACHE_NEW_WM_PROG)
+   },
+   .emit = gen7_update_hw_bt
+};
+
 /** @} */
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 776d221..ad70517 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -497,6 +497,7 @@ brwCreateContext(int api,
 
brw_fs_alloc_reg_sets(brw);
brw_vec4_alloc_reg_set(brw);
+   brw->hw_bt_pool = 0;
 
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
   brw_init_shader_time(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 1151163..f4c7824 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1320,6 +1320,7 @@ struct brw_context
   } reg_sets[2];
} wm;
 
+   drm_intel_bo *hw_bt_pool;
 
struct {
   uint32_t state_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 7dfb2b9..3530bbf 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/sr

[Mesa-dev] [PATCH 06/16] i965/gen7.5: Enable hardware-generated binding tables on render path.

2013-10-08 Thread Abdiel Janulgue
This patch implements the binding table enable command which is also
used to allocate a binding table pool where where hardware-generated
binding table entries are flushed into.

Each binding table offset in the binding table pool is unique per
each shader stage that are enabled within a batch.

In addition, this change inserts the required brw_tracked_state objects
to enable hw-generated binding tables in normal render path.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c |   84 
 src/mesa/drivers/dri/i965/brw_context.c|1 +
 src/mesa/drivers/dri/i965/brw_context.h|1 +
 src/mesa/drivers/dri/i965/brw_defines.h|3 +
 src/mesa/drivers/dri/i965/brw_state.h  |5 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c   |3 +
 6 files changed, 97 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 9d15bac..afbf667 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -164,6 +164,90 @@ const struct brw_tracked_state brw_gs_binding_table = {
.emit = brw_gs_upload_binding_table,
 };
 
+/**
+ * Hardware-generated binding tables for the resource streamer
+ */
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->has_resource_streamer)
+  return;
+
+   if (!brw->hw_bt_pool)
+  gen7_update_hw_bt(brw);
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
+   OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0,
+ HSW_BINDING_TABLE_ALLOC_OFFSET);
+   OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0,
+ brw->hw_bt_pool->size);
+   ADVANCE_BATCH();
+
+   /* Pipe control workaround */
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+   OUT_BATCH(0); /* address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+
+   /* Do a block clear for existing on-chip binding table entries
+  that might have stuck from the old batch*/
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_VS << 16 | (3 - 2));
+   OUT_BATCH(0x << 16 | 0x3 );
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_PS << 16 | (3 - 2));
+   OUT_BATCH(0x << 16 | 0x3 );
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+}
+
+void
+gen7_update_hw_bt(struct brw_context *brw)
+{
+   static const int bt_size = 256 * sizeof(uint16_t);
+
+   if (!brw->has_resource_streamer)
+  return;
+
+   if (brw->hw_bt_pool &&
+   (brw->wm.base.bind_bo_offset + bt_size) < brw->hw_bt_pool->size) {
+  brw->vs.base.bind_bo_offset = brw->wm.base.bind_bo_offset + bt_size;
+  brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size;
+   } else {
+  brw->vs.base.bind_bo_offset = 64;
+  brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size;
+  drm_intel_bo_unreference(brw->hw_bt_pool);
+  brw->hw_bt_pool = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+   65536, 4096);
+   }
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+   .dirty = {
+  .mesa = 0,
+  .brw = BRW_NEW_BATCH,
+  .cache = 0
+   },
+   .emit = gen7_enable_hw_binding_tables
+};
+
+const struct brw_tracked_state gen7_hw_bt_update = {
+   .dirty = {
+  .mesa = 0,
+  .brw = (BRW_NEW_BATCH |
+  BRW_NEW_VS_CONSTBUF |
+  BRW_NEW_SURFACES),
+  .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = gen7_update_hw_bt
+};
+
 /** @} */
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 776d221..ad70517 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -497,6 +497,7 @@ brwCreateContext(int api,
 
brw_fs_alloc_reg_sets(brw);
brw_vec4_alloc_reg_set(brw);
+   brw->hw_bt_pool = 0;
 
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
   brw_init_shader_time(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 1151163..f4c7824 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1320,6 +1320,7 @@ struct brw_context
   } reg_sets[2];
} wm;
 
+   drm_intel_bo *hw_bt_pool;
 
struct {
   uint32_t state_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 7dfb2b9..3530bbf 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1244,6 +1244,9 @@ enum brw_message_target {
 #define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */
 #define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */
 
+#define _3DSTATE_BINDING_TABLE_POOL_ALLOC   0x7919 /* GEN7.5+ */
+# define HSW_BINDING_TABLE_