On 05/07/2015 04:43 PM, Pohjolainen, Topi wrote: > On Tue, Apr 28, 2015 at 11:08:00PM +0300, Abdiel Janulgue wrote: >> This patch implements the binding table enable command which is also >> used to allocate a binding table pool where hardware-generated >> binding table entries are flushed into. Each binding table offset in >> the binding table pool is unique per each shader stage that are >> enabled within a batch. >> >> Also insert the required brw_tracked_state objects to enable >> hw-generated binding tables in normal render path. >> >> Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com> >> --- >> src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 >> ++++++++++++++++++++++++++ >> src/mesa/drivers/dri/i965/brw_context.c | 4 ++ >> src/mesa/drivers/dri/i965/brw_context.h | 5 ++ >> src/mesa/drivers/dri/i965/brw_state.h | 7 +++ >> src/mesa/drivers/dri/i965/brw_state_upload.c | 2 + >> src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 ++ >> 6 files changed, 92 insertions(+) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c >> b/src/mesa/drivers/dri/i965/brw_binding_tables.c >> index 459165a..a58e32e 100644 >> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c >> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c >> @@ -44,6 +44,11 @@ >> #include "brw_state.h" >> #include "intel_batchbuffer.h" >> >> +/* Somehow the hw-binding table pool offset must start here, otherwise >> + * the GPU will hang >> + */ >> +#define HW_BT_START_OFFSET 256; > > I think we want to understand this a little better before enabling... > >> + >> /** >> * Upload a shader stage's binding table as indirect state. >> * >> @@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = { >> .emit = brw_gs_upload_binding_table, >> }; >> >> +/** >> + * Hardware-generated binding tables for the resource streamer >> + */ >> +void >> +gen7_disable_hw_binding_tables(struct brw_context *brw) >> +{ >> + BEGIN_BATCH(3); >> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2)); >> + OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, >> BRW_HW_BINDING_TABLE_ENABLE) | >> + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0); >> + OUT_BATCH(0); >> + ADVANCE_BATCH(); >> + >> + /* Pipe control workaround */ >> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); >> +} >> + >> +void >> +gen7_enable_hw_binding_tables(struct brw_context *brw) >> +{ >> + if (!brw->has_resource_streamer) { >> + gen7_disable_hw_binding_tables(brw); > > I started wondering why we really need this - RS is disabled by default and > we haven't needed to do anything to disable it before. > >> + return; >> + } >> + >> + if (!brw->hw_bt_pool.bo) { >> + /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding >> Tables: >> + * >> + * "A maximum of 16,383 Binding tables are allowed in any batch >> buffer." >> + */ >> + int max_size = 16383 * 4; > > But does it really need this much all the time? I guess I need to go and > read the spec.
This is actually just one re-usable buffer object sticking around for the lifetime of the context. Compare this with creating lots of bo every-time we enable the resource streamer. I think it helps with reducing the amount of relocations we have. > >> + brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt", >> + max_size, 64); >> + brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET; >> + } >> + >> + uint32_t dw1 = SET_FIELD(BRW_HW_BINDING_TABLE_ON, >> BRW_HW_BINDING_TABLE_ENABLE); >> + if (brw->is_haswell) >> + dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) | >> HSW_HW_BINDING_TABLE_RESERVED; > > These are overflowing 80 columns. > >> + >> + BEGIN_BATCH(3); >> + OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2)); >> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1); >> + OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, >> + brw->hw_bt_pool.bo->size); >> + ADVANCE_BATCH(); >> + >> + /* Pipe control workaround */ >> + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE); > > Would you have a spec reference for this? 3D-Media-GPGPU Engine > Resource Streamer [HSW+] > Hardware Binding Tables [HSW+] > Programming note "When switching between HW and SW binding table generation, SW must issue a state cache invalidate." > >> +} >> + >> +void >> +gen7_reset_rs_pool_offsets(struct brw_context *brw) >> +{ >> + brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET; >> +} >> + >> +const struct brw_tracked_state gen7_hw_binding_tables = { >> + .dirty = { >> + .mesa = 0, >> + .brw = BRW_NEW_BATCH, >> + }, >> + .emit = gen7_enable_hw_binding_tables >> +}; >> + >> /** @} */ >> >> /** >> diff --git a/src/mesa/drivers/dri/i965/brw_context.c >> b/src/mesa/drivers/dri/i965/brw_context.c >> index c7e1e81..9c7ccae 100644 >> --- a/src/mesa/drivers/dri/i965/brw_context.c >> +++ b/src/mesa/drivers/dri/i965/brw_context.c >> @@ -953,6 +953,10 @@ intelDestroyContext(__DRIcontext * driContextPriv) >> if (brw->wm.base.scratch_bo) >> drm_intel_bo_unreference(brw->wm.base.scratch_bo); >> >> + gen7_reset_rs_pool_offsets(brw); >> + drm_intel_bo_unreference(brw->hw_bt_pool.bo); >> + brw->hw_bt_pool.bo = NULL; >> + >> drm_intel_gem_context_destroy(brw->hw_ctx); >> >> if (ctx->swrast_context) { >> diff --git a/src/mesa/drivers/dri/i965/brw_context.h >> b/src/mesa/drivers/dri/i965/brw_context.h >> index 07626af..1c72b74 100644 >> --- a/src/mesa/drivers/dri/i965/brw_context.h >> +++ b/src/mesa/drivers/dri/i965/brw_context.h >> @@ -1360,6 +1360,11 @@ struct brw_context >> uint32_t fast_clear_op; >> } wm; >> >> + /* RS hardware binding table */ >> + struct { >> + drm_intel_bo *bo; >> + uint32_t next_offset; >> + } hw_bt_pool; >> >> struct { >> uint32_t state_offset; >> diff --git a/src/mesa/drivers/dri/i965/brw_state.h >> b/src/mesa/drivers/dri/i965/brw_state.h >> index cfa67b6..d882bdd 100644 >> --- a/src/mesa/drivers/dri/i965/brw_state.h >> +++ b/src/mesa/drivers/dri/i965/brw_state.h >> @@ -130,6 +130,7 @@ extern const struct brw_tracked_state gen7_sol_state; >> extern const struct brw_tracked_state gen7_urb; >> extern const struct brw_tracked_state gen7_vs_state; >> extern const struct brw_tracked_state gen7_wm_state; >> +extern const struct brw_tracked_state gen7_hw_binding_tables; >> extern const struct brw_tracked_state haswell_cut_index; >> extern const struct brw_tracked_state gen8_blend_state; >> extern const struct brw_tracked_state gen8_disable_stages; >> @@ -297,6 +298,12 @@ gen7_upload_constant_state(struct brw_context *brw, >> const struct brw_stage_state *stage_state, >> bool active, unsigned opcode); >> >> +/* gen7_misc_state.c */ >> +void gen7_rs_control(struct brw_context *brw, int enable); >> +void gen7_enable_hw_binding_tables(struct brw_context *brw); >> +void gen7_disable_hw_binding_tables(struct brw_context *brw); >> +void gen7_reset_rs_pool_offsets(struct brw_context *brw); >> + >> #ifdef __cplusplus >> } >> #endif >> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c >> b/src/mesa/drivers/dri/i965/brw_state_upload.c >> index ab316bf..e9f3bbd 100644 >> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c >> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c >> @@ -190,6 +190,8 @@ static const struct brw_tracked_state >> *gen7_render_atoms[] = >> &gen6_color_calc_state, /* must do before cc unit */ >> &gen6_depth_stencil_state, /* must do before cc unit */ >> >> + &gen7_hw_binding_tables, /* Enable hw-generated binding tables for >> Haswell */ >> + >> &gen6_vs_push_constants, /* Before vs_state */ >> &gen6_gs_push_constants, /* Before gs_state */ >> &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */ >> diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c >> b/src/mesa/drivers/dri/i965/intel_batchbuffer.c >> index bce5830..509e898 100644 >> --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c >> +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c >> @@ -32,6 +32,7 @@ >> #include "intel_buffers.h" >> #include "intel_fbo.h" >> #include "brw_context.h" >> +#include "brw_state.h" >> >> #include <xf86drm.h> >> #include <i915_drm.h> >> @@ -374,6 +375,9 @@ _intel_batchbuffer_flush(struct brw_context *brw, >> drm_intel_bo_wait_rendering(brw->batch.bo); >> } >> >> + if (brw->gen >= 7) >> + gen7_reset_rs_pool_offsets(brw); >> + >> /* Start a new batch buffer. */ >> brw_new_batch(brw); >> >> -- >> 1.9.1 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev