[Mesa-dev] [PATCH 06/16] i965/gen7.5: Enable hardware-generated binding tables on render path.
This patch implements the binding table enable command which is also used to allocate a binding table pool where where hardware-generated binding table entries are flushed into. Each binding table offset in the binding table pool is unique per each shader stage that are enabled within a batch. In addition, this change inserts the required brw_tracked_state objects to enable hw-generated binding tables in normal render path. v3: Update binding table pool offsets on new geometry shader constant buffers, geometry shader program cache, and fragment shader program cache. Previous implementation failed to catch this flags. Signed-off-by: Abdiel Janulgue --- src/mesa/drivers/dri/i965/brw_binding_tables.c | 87 src/mesa/drivers/dri/i965/brw_context.c|1 + src/mesa/drivers/dri/i965/brw_context.h|1 + src/mesa/drivers/dri/i965/brw_defines.h|3 + src/mesa/drivers/dri/i965/brw_state.h |5 ++ src/mesa/drivers/dri/i965/brw_state_upload.c |3 + 6 files changed, 100 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c index 9d15bac..534fbb8 100644 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c @@ -164,6 +164,93 @@ const struct brw_tracked_state brw_gs_binding_table = { .emit = brw_gs_upload_binding_table, }; +/** + * Hardware-generated binding tables for the resource streamer + */ +void +gen7_enable_hw_binding_tables(struct brw_context *brw) +{ + if (!brw->has_resource_streamer) + return; + + if (!brw->hw_bt_pool) + gen7_update_hw_bt(brw); + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2)); + OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0, + HSW_BINDING_TABLE_ALLOC_OFFSET); + OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0, + brw->hw_bt_pool->size); + ADVANCE_BATCH(); + + /* Pipe control workaround */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + /* Do a block clear for existing on-chip binding table entries + that might have stuck from the old batch*/ + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_VS << 16 | (3 - 2)); + OUT_BATCH(0x << 16 | 0x3 ); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_PS << 16 | (3 - 2)); + OUT_BATCH(0x << 16 | 0x3 ); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +void +gen7_update_hw_bt(struct brw_context *brw) +{ + static const int bt_size = 256 * sizeof(uint16_t); + + if (!brw->has_resource_streamer) + return; + + if (brw->hw_bt_pool && + (brw->wm.base.bind_bo_offset + bt_size) < brw->hw_bt_pool->size) { + brw->vs.base.bind_bo_offset = brw->wm.base.bind_bo_offset + bt_size; + brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size; + } else { + brw->vs.base.bind_bo_offset = 64; + brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size; + drm_intel_bo_unreference(brw->hw_bt_pool); + brw->hw_bt_pool = drm_intel_bo_alloc(brw->bufmgr, "hw_bt", + 65536, 4096); + } +} + +const struct brw_tracked_state gen7_hw_binding_tables = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = 0 + }, + .emit = gen7_enable_hw_binding_tables +}; + +const struct brw_tracked_state gen7_hw_bt_update = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VS_CONSTBUF | + BRW_NEW_GS_CONSTBUF | + BRW_NEW_SURFACES), + .cache = (CACHE_NEW_VS_PROG | +CACHE_NEW_GS_PROG | +CACHE_NEW_WM_PROG) + }, + .emit = gen7_update_hw_bt +}; + /** @} */ /** diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 776d221..ad70517 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -497,6 +497,7 @@ brwCreateContext(int api, brw_fs_alloc_reg_sets(brw); brw_vec4_alloc_reg_set(brw); + brw->hw_bt_pool = 0; if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1151163..f4c7824 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1320,6 +1320,7 @@ struct brw_context } reg_sets[2]; } wm; + drm_intel_bo *hw_bt_pool; struct { uint32_t state_offset; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 7dfb2b9..3530bbf 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/sr
[Mesa-dev] [PATCH 06/16] i965/gen7.5: Enable hardware-generated binding tables on render path.
This patch implements the binding table enable command which is also used to allocate a binding table pool where where hardware-generated binding table entries are flushed into. Each binding table offset in the binding table pool is unique per each shader stage that are enabled within a batch. In addition, this change inserts the required brw_tracked_state objects to enable hw-generated binding tables in normal render path. Signed-off-by: Abdiel Janulgue --- src/mesa/drivers/dri/i965/brw_binding_tables.c | 84 src/mesa/drivers/dri/i965/brw_context.c|1 + src/mesa/drivers/dri/i965/brw_context.h|1 + src/mesa/drivers/dri/i965/brw_defines.h|3 + src/mesa/drivers/dri/i965/brw_state.h |5 ++ src/mesa/drivers/dri/i965/brw_state_upload.c |3 + 6 files changed, 97 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c b/src/mesa/drivers/dri/i965/brw_binding_tables.c index 9d15bac..afbf667 100644 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c @@ -164,6 +164,90 @@ const struct brw_tracked_state brw_gs_binding_table = { .emit = brw_gs_upload_binding_table, }; +/** + * Hardware-generated binding tables for the resource streamer + */ +void +gen7_enable_hw_binding_tables(struct brw_context *brw) +{ + if (!brw->has_resource_streamer) + return; + + if (!brw->hw_bt_pool) + gen7_update_hw_bt(brw); + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2)); + OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0, + HSW_BINDING_TABLE_ALLOC_OFFSET); + OUT_RELOC(brw->hw_bt_pool, I915_GEM_DOMAIN_SAMPLER, 0, + brw->hw_bt_pool->size); + ADVANCE_BATCH(); + + /* Pipe control workaround */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2)); + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); /* address */ + OUT_BATCH(0); /* write data */ + ADVANCE_BATCH(); + + /* Do a block clear for existing on-chip binding table entries + that might have stuck from the old batch*/ + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_VS << 16 | (3 - 2)); + OUT_BATCH(0x << 16 | 0x3 ); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_BINDING_TABLE_EDIT_PS << 16 | (3 - 2)); + OUT_BATCH(0x << 16 | 0x3 ); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +void +gen7_update_hw_bt(struct brw_context *brw) +{ + static const int bt_size = 256 * sizeof(uint16_t); + + if (!brw->has_resource_streamer) + return; + + if (brw->hw_bt_pool && + (brw->wm.base.bind_bo_offset + bt_size) < brw->hw_bt_pool->size) { + brw->vs.base.bind_bo_offset = brw->wm.base.bind_bo_offset + bt_size; + brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size; + } else { + brw->vs.base.bind_bo_offset = 64; + brw->wm.base.bind_bo_offset = brw->vs.base.bind_bo_offset + bt_size; + drm_intel_bo_unreference(brw->hw_bt_pool); + brw->hw_bt_pool = drm_intel_bo_alloc(brw->bufmgr, "hw_bt", + 65536, 4096); + } +} + +const struct brw_tracked_state gen7_hw_binding_tables = { + .dirty = { + .mesa = 0, + .brw = BRW_NEW_BATCH, + .cache = 0 + }, + .emit = gen7_enable_hw_binding_tables +}; + +const struct brw_tracked_state gen7_hw_bt_update = { + .dirty = { + .mesa = 0, + .brw = (BRW_NEW_BATCH | + BRW_NEW_VS_CONSTBUF | + BRW_NEW_SURFACES), + .cache = CACHE_NEW_VS_PROG + }, + .emit = gen7_update_hw_bt +}; + /** @} */ /** diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 776d221..ad70517 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -497,6 +497,7 @@ brwCreateContext(int api, brw_fs_alloc_reg_sets(brw); brw_vec4_alloc_reg_set(brw); + brw->hw_bt_pool = 0; if (INTEL_DEBUG & DEBUG_SHADER_TIME) brw_init_shader_time(brw); diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 1151163..f4c7824 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1320,6 +1320,7 @@ struct brw_context } reg_sets[2]; } wm; + drm_intel_bo *hw_bt_pool; struct { uint32_t state_offset; diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 7dfb2b9..3530bbf 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1244,6 +1244,9 @@ enum brw_message_target { #define _3DSTATE_BINDING_TABLE_POINTERS_GS 0x7829 /* GEN7+ */ #define _3DSTATE_BINDING_TABLE_POINTERS_PS 0x782A /* GEN7+ */ +#define _3DSTATE_BINDING_TABLE_POOL_ALLOC 0x7919 /* GEN7.5+ */ +# define HSW_BINDING_TABLE_