This brings over the batch-wrap-prevention and aperture space checking code from the normal brw_draw.c path, so that we don't need to flush the batch every time.
There's a risk here if the intel_emit_post_sync_nonzero_flush() call isn't high enough up in the state emit sequences -- before, we implicitly had one at the batch flush before any state was emitted, so Mesa's workaround emits didn't really matter. Improves cairo-gl performance by 13.7733% +/- 1.74876% (n=30/32) No statistically significant performance difference on unigine tropics (n=10) No statistically significant performance difference on openarena (n=755) No statistically significant performance difference on Lightsmark (n=15, though this may be an issue of test power -- looks like a ~.3% performance hit) Reduces low-resolution GLB 2.7 performance by 0.604517% +/- 0.140544% (n=132/133) --- I've got the test system running more Lightsmark now -- the bimodal distribution of its results was killing the stats, and I'd bumped the power cable and it ran out of battery and died. I'm a little mystified by the small GLB and possibly LM regressions. My theory was the first-post-swap-batch throttling, except that we've got about 5 batches per frame on GLB. src/mesa/drivers/dri/i965/brw_blorp.cpp | 51 +++++++++++++++++++++++++++++++- src/mesa/drivers/dri/i965/brw_blorp.h | 4 --- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 14 --------- src/mesa/drivers/dri/i965/gen7_blorp.cpp | 1 - 4 files changed, 50 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp b/src/mesa/drivers/dri/i965/brw_blorp.cpp index 92bee3e..d02c660 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp @@ -21,6 +21,7 @@ * IN THE SOFTWARE. */ +#include <errno.h> #include "intel_batchbuffer.h" #include "intel_fbo.h" @@ -191,7 +192,26 @@ intel_hiz_exec(struct intel_context *intel, struct intel_mipmap_tree *mt, void brw_blorp_exec(struct intel_context *intel, const brw_blorp_params *params) { - struct brw_context *brw = brw_context(&intel->ctx); + struct gl_context *ctx = &intel->ctx; + struct brw_context *brw = brw_context(ctx); + uint32_t estimated_max_batch_usage = 1500; + bool check_aperture_failed_once = false; + + /* Flush the sampler and render caches. We definitely need to flush the + * sampler cache so that we get updated contents from the render cache for + * the glBlitFramebuffer() source. Also, we are sometimes warned in the + * docs to flush the cache between reinterpretations of the same surface + * data with different formats, which blorp does for stencil and depth + * data. + */ + intel_batchbuffer_emit_mi_flush(intel); + +retry: + intel_batchbuffer_require_space(intel, estimated_max_batch_usage, false); + intel_batchbuffer_save_state(intel); + drm_intel_bo *saved_bo = intel->batch.bo; + uint32_t saved_used = intel->batch.used; + uint32_t saved_state_batch_offset = intel->batch.state_batch_offset; switch (intel->gen) { case 6: @@ -206,6 +226,35 @@ brw_blorp_exec(struct intel_context *intel, const brw_blorp_params *params) break; } + /* Make sure we didn't wrap the batch unintentionally, and make sure we + * reserved enough space that a wrap will never happen. + */ + assert(intel->batch.bo == saved_bo); + assert((intel->batch.used - saved_used) * 4 + + (saved_state_batch_offset - intel->batch.state_batch_offset) < + estimated_max_batch_usage); + /* Shut up compiler warnings on release build */ + (void)saved_bo; + (void)saved_used; + (void)saved_state_batch_offset; + + /* Check if the blorp op we just did would make our batch likely to fail to + * map all the BOs into the GPU at batch exec time later. If so, flush the + * batch and try again with nothing else in the batch. + */ + if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) { + if (!check_aperture_failed_once) { + check_aperture_failed_once = true; + intel_batchbuffer_reset_to_saved(intel); + intel_batchbuffer_flush(intel); + goto retry; + } else { + int ret = intel_batchbuffer_flush(intel); + WARN_ONCE(ret == -ENOSPC, + "i965: blorp emit exceeded available aperture space\n"); + } + } + if (unlikely(intel->always_flush_batch)) intel_batchbuffer_flush(intel); diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index ffc27cc..a726201 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -353,10 +353,6 @@ void gen6_blorp_init(struct brw_context *brw); void -gen6_blorp_emit_batch_head(struct brw_context *brw, - const brw_blorp_params *params); - -void gen6_blorp_emit_state_base_address(struct brw_context *brw, const brw_blorp_params *params); diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 3ccd90e..de6007d 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -45,19 +45,6 @@ * sizeof(float)) /** \} */ -void -gen6_blorp_emit_batch_head(struct brw_context *brw, - const brw_blorp_params *params) -{ - struct gl_context *ctx = &brw->intel.ctx; - - /* To ensure that the batch contains only the resolve, flush the batch - * before beginning and after finishing emitting the resolve packets. - */ - intel_flush(ctx); -} - - /** * CMD_STATE_BASE_ADDRESS * @@ -1057,7 +1044,6 @@ gen6_blorp_exec(struct intel_context *intel, uint32_t wm_bind_bo_offset = 0; uint32_t prog_offset = params->get_wm_prog(brw, &prog_data); - gen6_blorp_emit_batch_head(brw, params); gen6_emit_3dstate_multisample(brw, params->num_samples); gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u); gen6_blorp_emit_state_base_address(brw, params); diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 822f954..8e0f7b0 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -856,7 +856,6 @@ gen7_blorp_exec(struct intel_context *intel, uint32_t sampler_offset = 0; uint32_t prog_offset = params->get_wm_prog(brw, &prog_data); - gen6_blorp_emit_batch_head(brw, params); gen6_emit_3dstate_multisample(brw, params->num_samples); gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u); gen6_blorp_emit_state_base_address(brw, params); -- 1.8.3.rc0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev