There are a lot of places that use PIPE_CONTROL to write a value to a buffer (either an immediate write, TIMESTAMP, or PS_DEPTH_COUNT). Creating a single function to do this seems convenient.
As part of this refactor, we now set the PPGTT/GTT selection bit correctly on Gen7+. Previously, we set bit 2 of DW2 on all platforms. This is correct for Sandybridge, but actually part of the address on Ivybridge and later! Broadwell will also increase the length of these packets by 1; with the refactoring, we should have to adjust that in substantially fewer places, giving us confidence that we've hit them all. Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> --- src/mesa/drivers/dri/i965/brw_queryobj.c | 54 ++++------------ src/mesa/drivers/dri/i965/gen6_queryobj.c | 15 ++--- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 89 +++++++++++++++------------ src/mesa/drivers/dri/i965/intel_batchbuffer.h | 3 + 4 files changed, 68 insertions(+), 93 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 1c45ca2..31f1cab 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -49,35 +49,14 @@ void brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { - if (brw->gen >= 6) { - /* Emit workaround flushes: */ - if (brw->gen == 6) { - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - } - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - idx * sizeof(uint64_t)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | - PIPE_CONTROL_WRITE_TIMESTAMP); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - idx * sizeof(uint64_t)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + if (brw->gen == 6) { + /* Emit Sandybridge workaround flush: */ + brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); } + + brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_TIMESTAMP, + query_bo, idx * sizeof(uint64_t), 0, 0); } /** @@ -88,21 +67,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { assert(brw->gen < 6); - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | - PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); - /* This object could be mapped cacheable, but we don't have an exposed - * mechanism to support that. Since it's going uncached, tell GEM that - * we're writing to it. The usual clflush should be all that's required - * to pick up the results. - */ - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - (idx * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_DEPTH_COUNT + | PIPE_CONTROL_DEPTH_STALL, + query_bo, idx * sizeof(uint64_t), 0, 0); } /** diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index b96f9a2..5257127 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -49,17 +49,10 @@ write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) if (brw->gen == 6) intel_emit_post_sync_nonzero_flush(brw); - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_WRITE_DEPTH_COUNT); - OUT_RELOC(query_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE | - (idx * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_DEPTH_COUNT + | PIPE_CONTROL_DEPTH_STALL, + query_bo, idx * sizeof(uint64_t), 0, 0); } /* diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index afc4850..ef6c7a0 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -502,6 +502,44 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags) } /** + * Emit a PIPE_CONTROL that writes to a buffer object. + * + * \p flags should contain one of the following items: + * - PIPE_CONTROL_WRITE_IMMEDIATE + * - PIPE_CONTROL_WRITE_TIMESTAMP + * - PIPE_CONTROL_WRITE_DEPTH_COUNT + */ +void +brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, + drm_intel_bo *bo, uint32_t offset, + uint32_t imm_lower, uint32_t imm_upper) +{ + if (brw->gen >= 6) { + /* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24 + * on later platforms. We always use PPGTT on Gen7+. + */ + unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0; + + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); + OUT_BATCH(flags); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + gen6_gtt | offset); + OUT_BATCH(imm_lower); + OUT_BATCH(imm_upper); + ADVANCE_BATCH(); + } else { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2)); + OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + PIPE_CONTROL_GLOBAL_GTT_WRITE | offset); + OUT_BATCH(imm_lower); + OUT_BATCH(imm_upper); + ADVANCE_BATCH(); + } +} + +/** * Restriction [DevSNB, DevIVB]: * * Prior to changing Depth/Stencil Buffer state (i.e. any combination of @@ -535,15 +573,11 @@ void gen7_emit_vs_workaround_flush(struct brw_context *brw) { assert(brw->gen == 7); - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BATCH(0); /* write data */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_WRITE_IMMEDIATE + | PIPE_CONTROL_DEPTH_STALL, + brw->batch.workaround_bo, 0, + 0, 0); } @@ -553,27 +587,11 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw) void gen7_emit_cs_stall_flush(struct brw_context *brw) { - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - /* From p61 of the Ivy Bridge PRM (1.10.4 PIPE_CONTROL Command: DW1[20] - * CS Stall): - * - * One of the following must also be set: - * - Render Target Cache Flush Enable ([12] of DW1) - * - Depth Cache Flush Enable ([0] of DW1) - * - Stall at Pixel Scoreboard ([1] of DW1) - * - Depth Stall ([13] of DW1) - * - Post-Sync Operation ([13] of DW1) - * - * We choose to do a Post-Sync Operation (Write Immediate Data), since - * it seems like it will incur the least additional performance penalty. - */ - OUT_BATCH(PIPE_CONTROL_CS_STALL | PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, + PIPE_CONTROL_CS_STALL + | PIPE_CONTROL_WRITE_IMMEDIATE, + brw->batch.workaround_bo, 0, + 0, 0); } @@ -623,15 +641,8 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw) brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD); - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE); - OUT_RELOC(brw->batch.workaround_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - PIPE_CONTROL_GLOBAL_GTT_WRITE); - OUT_BATCH(0); /* write data */ - OUT_BATCH(0); /* write data */ - ADVANCE_BATCH(); + brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE, + brw->batch.workaround_bo, 0, 0, 0); brw->batch.need_workaround_flush = false; } diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h b/src/mesa/drivers/dri/i965/intel_batchbuffer.h index 47e8d64..159f928 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h @@ -65,6 +65,9 @@ bool intel_batchbuffer_emit_reloc_fenced(struct brw_context *brw, uint32_t write_domain, uint32_t offset); void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags); +void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags, + drm_intel_bo *bo, uint32_t offset, + uint32_t imm_lower, uint32_t imm_upper); void intel_batchbuffer_emit_mi_flush(struct brw_context *brw); void intel_emit_post_sync_nonzero_flush(struct brw_context *brw); void intel_emit_depth_stall_flushes(struct brw_context *brw); -- 1.8.4.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev