Previously whenever a primitive is drawn the driver would call _mesa_check_conditional_render which blocks waiting for the result of the query to determine whether to render. On Gen7+ there is a bit in the 3DPRIMITIVE command which can be used to disable the primitive based on the value of a state bit. This state bit can be set based on whether two registers have different values using the MI_PREDICATE command. We can load these two registers with the pixel count values stored in the query begin and end to implement conditional rendering without stalling.
Unfortunately these two source registers were not in the whitelist of available registers in the kernel driver until v3.19. This patch uses the command parser version from intel_screen to detect whether to attempt to set the predicate data registers. The predicate enable bit is currently only used for drawing 3D primitives. For blits, clears, bitmaps, copypixels and drawpixels it still causes a stall. For most of these it would probably just work to call the new brw_check_conditional_render function instead of _mesa_check_conditional_render because they already work in terms of rendering primitives. However it's a bit trickier for blits because it can use the BLT ring or the blorp codepath. I think these operations are less useful for conditional rendering than rendering primitives so it might be best to leave it for a later patch. v2: Use the command parser version to detect whether we can write to the predicate data registers instead of trying to execute a register load command. v3: Simple rebase --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_conditional_render.c | 167 +++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_context.c | 4 + src/mesa/drivers/dri/i965/brw_context.h | 21 +++ src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_draw.c | 16 +- src/mesa/drivers/dri/i965/brw_queryobj.c | 17 ++- src/mesa/drivers/dri/i965/intel_extensions.c | 5 + src/mesa/drivers/dri/i965/intel_reg.h | 23 +++ 9 files changed, 243 insertions(+), 12 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/brw_conditional_render.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 1ae93e1..a24c20a 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -18,6 +18,7 @@ i965_FILES = \ brw_clip_unfilled.c \ brw_clip_util.c \ brw_compute.c \ + brw_conditional_render.c \ brw_context.c \ brw_context.h \ brw_cs.cpp \ diff --git a/src/mesa/drivers/dri/i965/brw_conditional_render.c b/src/mesa/drivers/dri/i965/brw_conditional_render.c new file mode 100644 index 0000000..e676aa0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_conditional_render.c @@ -0,0 +1,167 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Neil Roberts <n...@linux.intel.com> + */ + +/** @file brw_conditional_render.c + * + * Support for conditional rendering based on query objects + * (GL_NV_conditional_render, GL_ARB_conditional_render_inverted) on Gen7+. + */ + +#include "main/imports.h" +#include "main/condrender.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "intel_batchbuffer.h" + +static void +set_predicate_enable(struct brw_context *brw, + bool value) +{ + if (value) + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + else + brw->predicate.state = BRW_PREDICATE_STATE_DONT_RENDER; +} + +static void +load_64bit_register(struct brw_context *brw, + uint32_t reg, + drm_intel_bo *bo, + uint32_t offset) +{ + int i; + + for (i = 0; i < 2; i++) { + brw_load_register_mem(brw, + reg + i * 4, + bo, + I915_GEM_DOMAIN_INSTRUCTION, + 0, /* write domain */ + offset + i * 4); + } +} + +static void +set_predicate_for_result(struct brw_context *brw, + struct brw_query_object *query, + bool inverted) +{ + int load_op; + + assert(query->bo != NULL); + + load_64bit_register(brw, MI_PREDICATE_SRC0, query->bo, 0); + load_64bit_register(brw, MI_PREDICATE_SRC1, query->bo, 8); + + if (inverted) + load_op = MI_PREDICATE_LOADOP_LOAD; + else + load_op = MI_PREDICATE_LOADOP_LOADINV; + + BEGIN_BATCH(1); + OUT_BATCH(GEN7_MI_PREDICATE | + load_op | + MI_PREDICATE_COMBINEOP_SET | + MI_PREDICATE_COMPAREOP_SRCS_EQUAL); + ADVANCE_BATCH(); + + brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT; +} + +static void +brw_begin_conditional_render(struct gl_context *ctx, + struct gl_query_object *q, + GLenum mode) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_query_object *query = (struct brw_query_object *) q; + bool inverted; + + if (!brw->predicate.supported) + return; + + switch (mode) { + case GL_QUERY_WAIT: + case GL_QUERY_NO_WAIT: + case GL_QUERY_BY_REGION_WAIT: + case GL_QUERY_BY_REGION_NO_WAIT: + inverted = false; + break; + case GL_QUERY_WAIT_INVERTED: + case GL_QUERY_NO_WAIT_INVERTED: + case GL_QUERY_BY_REGION_WAIT_INVERTED: + case GL_QUERY_BY_REGION_NO_WAIT_INVERTED: + inverted = true; + break; + default: + unreachable("Unexpected conditional render mode"); + } + + /* If there are already samples from a BLT operation or if the query object + * is ready then we can avoid looking at the values in the buffer and just + * decide whether to draw using the CPU without stalling */ + if (query->Base.Result || query->Base.Ready) + set_predicate_enable(brw, (query->Base.Result != 0) ^ inverted); + else + set_predicate_for_result(brw, query, inverted); +} + +static void +brw_end_conditional_render(struct gl_context *ctx, + struct gl_query_object *q) +{ + struct brw_context *brw = brw_context(ctx); + + /* When there is no longer a conditional render in progress it should + * always render */ + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; +} + +void +brw_init_conditional_render_functions(struct dd_function_table *functions) +{ + functions->BeginConditionalRender = brw_begin_conditional_render; + functions->EndConditionalRender = brw_end_conditional_render; +} + +bool +brw_check_conditional_render(struct brw_context *brw) +{ + if (brw->predicate.supported) { + /* In some cases it is possible to determine that the primitives should + * be skipped without needing the predicate enable bit and still without + * stalling */ + return brw->predicate.state != BRW_PREDICATE_STATE_DONT_RENDER; + } else { + if (brw->ctx.Query.CondRenderQuery) { + perf_debug("Conditional rendering is implemented in software and may " + "stall.\n"); + } + + return _mesa_check_conditional_render(&brw->ctx); + } +} diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index fd7420a..673529a 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -289,6 +289,8 @@ brw_init_driver_functions(struct brw_context *brw, else gen4_init_queryobj_functions(functions); brw_init_compute_functions(functions); + if (brw->gen >= 7) + brw_init_conditional_render_functions(functions); functions->QuerySamplesForFormat = brw_query_samples_for_format; @@ -891,6 +893,8 @@ brwCreateContext(gl_api api, brw->gs.enabled = false; brw->sf.viewport_transform_enable = true; + brw->predicate.state = BRW_PREDICATE_STATE_RENDER; + ctx->VertexProgram._MaintainTnlProgram = true; ctx->FragmentProgram._MaintainTexEnvProgram = true; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 834aaa4..b9383fa 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -966,6 +966,18 @@ struct brw_stage_state uint32_t sampler_offset; }; +enum brw_predicate_state { + /* The first two states are used if we can determine whether to draw + * without having to look at the values in the query object buffer. This + * will happen if there is no conditional render in progress, if the query + * object is already completed or if something else has already added + * samples to the preliminary result such as via a BLT command. */ + BRW_PREDICATE_STATE_RENDER, + BRW_PREDICATE_STATE_DONT_RENDER, + /* In this case whether to draw or not depends on the result of an + * MI_PREDICATE command so the predicate enable bit needs to be checked */ + BRW_PREDICATE_STATE_USE_BIT +}; /** * brw_context is derived from gl_context. @@ -1402,6 +1414,11 @@ struct brw_context } query; struct { + enum brw_predicate_state state; + bool supported; + } predicate; + + struct { /** A map from pipeline statistics counter IDs to MMIO addresses. */ const int *statistics_registers; @@ -1600,6 +1617,10 @@ void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx); void brw_store_register_mem64(struct brw_context *brw, drm_intel_bo *bo, uint32_t reg, int idx); +/** brw_conditional_render.c */ +void brw_init_conditional_render_functions(struct dd_function_table *functions); +bool brw_check_conditional_render(struct brw_context *brw); + /** intel_batchbuffer.c */ void brw_load_register_mem(struct brw_context *brw, uint32_t reg, diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 83d7a35..11cb3fa 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -51,6 +51,7 @@ # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 15) # define GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 15) # define GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE (1 << 10) +# define GEN7_3DPRIM_PREDICATE_ENABLE (1 << 8) /* DW1 */ # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL (0 << 8) # define GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM (1 << 8) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 96e2369..a7164db 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -178,6 +178,7 @@ static void brw_emit_prim(struct brw_context *brw, int verts_per_instance; int vertex_access_type; int indirect_flag; + int predicate_enable; DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode), prim->start, prim->count); @@ -258,10 +259,14 @@ static void brw_emit_prim(struct brw_context *brw, indirect_flag = 0; } - if (brw->gen >= 7) { + if (brw->predicate.state == BRW_PREDICATE_STATE_USE_BIT) + predicate_enable = GEN7_3DPRIM_PREDICATE_ENABLE; + else + predicate_enable = 0; + BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2) | indirect_flag | predicate_enable); OUT_BATCH(hw_prim | vertex_access_type); } else { BEGIN_BATCH(6); @@ -561,12 +566,7 @@ void brw_draw_prims( struct gl_context *ctx, assert(unused_tfb_object == NULL); - if (ctx->Query.CondRenderQuery) { - perf_debug("Conditional rendering is implemented in software and may " - "stall. This should be fixed in the driver.\n"); - } - - if (!_mesa_check_conditional_render(ctx)) + if (!brw_check_conditional_render(brw)) return; /* Handle primitive restart if needed */ diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 667c900..f0a50fe 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -66,10 +66,19 @@ brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) { - brw_emit_pipe_control_write(brw, - PIPE_CONTROL_WRITE_DEPTH_COUNT - | PIPE_CONTROL_DEPTH_STALL, - query_bo, idx * sizeof(uint64_t), 0, 0); + uint32_t flags; + + flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_DEPTH_STALL); + + /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM + * command when loading the values into the predicate source registers for + * conditional rendering */ + if (brw->predicate.supported) + flags |= PIPE_CONTROL_FLUSH_ENABLE; + + brw_emit_pipe_control_write(brw, flags, query_bo, + idx * sizeof(uint64_t), 0, 0); } /** diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index c3eee31..cafb774 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -320,6 +320,8 @@ intelInitExtensions(struct gl_context *ctx) } } + brw->predicate.supported = false; + if (brw->gen >= 7) { ctx->Extensions.ARB_conservative_depth = true; ctx->Extensions.ARB_derivative_control = true; @@ -333,6 +335,9 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_transform_feedback2 = true; ctx->Extensions.ARB_transform_feedback3 = true; ctx->Extensions.ARB_transform_feedback_instanced = true; + + if (brw->intelScreen->cmd_parser_version >= 2) + brw->predicate.supported = true; } /* Only enable this in core profile because other parts of Mesa behave diff --git a/src/mesa/drivers/dri/i965/intel_reg.h b/src/mesa/drivers/dri/i965/intel_reg.h index 488fb5b..bd14e18 100644 --- a/src/mesa/drivers/dri/i965/intel_reg.h +++ b/src/mesa/drivers/dri/i965/intel_reg.h @@ -48,6 +48,20 @@ #define GEN7_MI_LOAD_REGISTER_MEM (CMD_MI | (0x29 << 23)) # define MI_LOAD_REGISTER_MEM_USE_GGTT (1 << 22) +/* Manipulate the predicate bit based on some register values. Only on Gen7+ */ +#define GEN7_MI_PREDICATE (CMD_MI | (0xC << 23)) +# define MI_PREDICATE_LOADOP_KEEP (0 << 6) +# define MI_PREDICATE_LOADOP_LOAD (2 << 6) +# define MI_PREDICATE_LOADOP_LOADINV (3 << 6) +# define MI_PREDICATE_COMBINEOP_SET (0 << 3) +# define MI_PREDICATE_COMBINEOP_AND (1 << 3) +# define MI_PREDICATE_COMBINEOP_OR (2 << 3) +# define MI_PREDICATE_COMBINEOP_XOR (3 << 3) +# define MI_PREDICATE_COMPAREOP_TRUE (0 << 0) +# define MI_PREDICATE_COMPAREOP_FALSE (1 << 0) +# define MI_PREDICATE_COMPAREOP_SRCS_EQUAL (2 << 0) +# define MI_PREDICATE_COMPAREOP_DELTAS_EQUAL (3 << 0) + /** @{ * * PIPE_CONTROL operation, a combination MI_FLUSH and register write with @@ -69,6 +83,7 @@ #define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1 << 10) /* GM45+ only */ #define PIPE_CONTROL_ISP_DIS (1 << 9) #define PIPE_CONTROL_INTERRUPT_ENABLE (1 << 8) +#define PIPE_CONTROL_FLUSH_ENABLE (1 << 7) /* Gen7+ only */ /* GT */ #define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1 << 5) #define PIPE_CONTROL_VF_CACHE_INVALIDATE (1 << 4) @@ -147,3 +162,11 @@ # define GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC (1 << 1) # define GEN8_HIZ_PMA_MASK_BITS \ ((GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE) << 16) + +/* Predicate registers */ +#define MI_PREDICATE_SRC0 0x2400 +#define MI_PREDICATE_SRC1 0x2408 +#define MI_PREDICATE_DATA 0x2410 +#define MI_PREDICATE_RESULT 0x2418 +#define MI_PREDICATE_RESULT_1 0x241C +#define MI_PREDICATE_RESULT_2 0x2214 -- 1.9.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev