On Fri, Dec 27, 2013 at 7:25 AM, Chad Versace <chad.vers...@linux.intel.com> wrote: > On 12/10/2013 09:54 PM, Chia-I Wu wrote: >> >> When the depth buffer is already cleared, skip GEN6_HIZ_OP_DEPTH_CLEAR. >> This >> is made possible by tracking which slices have been cleared in >> "struct intel_mipmap_level". The hiz_cleared flag is unset when the depth >> buffer is rendered to or when a HiZ resolve is needed. >> >> For Unigine Tropics, the FPS improvement is 1.32134% +/- 0.161878% (n=13). > > > The code looks correct to me, and the perf improvement is nice. I have > comments below that should improve the maintainability of the affected > codepaths. > > >> --- >> src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 1 + >> src/mesa/drivers/dri/i965/brw_clear.c | 58 >> +++++++++++++++++++++------ >> src/mesa/drivers/dri/i965/brw_draw.c | 16 +++++++- >> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 37 +++++++++++++++++ >> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 21 ++++++++++ >> 5 files changed, 119 insertions(+), 14 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> index 51a3bef..d9ec3e9 100644 >> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp >> @@ -173,6 +173,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw, >> brw_blorp_exec(brw, ¶ms); >> >> intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, >> dst_layer); >> + intel_miptree_slice_set_hiz_cleared(dst_mt, dst_level, dst_layer, >> false); > > > If the miptree slice needs a hiz resolve, then the hiz buffer is not > cleared. You captured > this invariant by appending ``intel_miptree_slice_set_hiz_cleared(false)`` > to each occurrence > ``intel_miptree_slice_set_needs_hiz_resolve()``. > > In effect, this patch introduces the requirement that all calls to > ``intel_miptree_slice_set_needs_hiz_resolve()`` > be followed by ``intel_miptree_slice_set_hiz_cleared(false)``. Rather than > introducing an implicit > requirement, ``intel_miptree_slice_set_needs_hiz_resolve()`` should > automatically set ``hiz_cleared = false``. > > >> } >> >> static void >> diff --git a/src/mesa/drivers/dri/i965/brw_clear.c >> b/src/mesa/drivers/dri/i965/brw_clear.c >> index 1cac996..9dfb94a 100644 >> --- a/src/mesa/drivers/dri/i965/brw_clear.c >> +++ b/src/mesa/drivers/dri/i965/brw_clear.c >> @@ -164,34 +164,66 @@ brw_fast_clear_depth(struct gl_context *ctx) >> break; >> } >> >> + unsigned num_layers_cleared = 0; >> + bool clear_all_layers = false; >> + >> /* If we're clearing to a new clear value, then we need to resolve >> any clear >> * flags out of the HiZ buffer into the real depth buffer. >> */ >> if (mt->depth_clear_value != depth_clear_value) { >> intel_miptree_all_slices_resolve_depth(brw, mt); >> mt->depth_clear_value = depth_clear_value; >> - } >> >> - /* From the Sandy Bridge PRM, volume 2 part 1, page 313: >> - * >> - * "If other rendering operations have preceded this clear, a >> - * PIPE_CONTROL with write cache flush enabled and Z-inhibit >> disabled >> - * must be issued before the rectangle primitive used for the >> depth >> - * buffer clear operation. >> - */ >> - intel_batchbuffer_emit_mi_flush(brw); >> + clear_all_layers = true; >> + } >> >> if (fb->NumLayers > 0) { >> assert(fb->NumLayers == >> depth_irb->mt->level[depth_irb->mt_level].depth); >> for (unsigned layer = 0; layer < fb->NumLayers; layer++) { >> - intel_hiz_exec(brw, mt, depth_irb->mt_level, layer, >> - GEN6_HIZ_OP_DEPTH_CLEAR); >> + if (clear_all_layers || >> + !intel_miptree_slice_get_hiz_cleared(mt, >> + depth_irb->mt_level, >> + layer)) { >> + /* From the Sandy Bridge PRM, volume 2 part 1, page 313: >> + * >> + * "If other rendering operations have preceded this >> clear, a >> + * PIPE_CONTROL with write cache flush enabled and >> Z-inhibit >> + * disabled must be issued before the rectangle >> primitive used >> + * for the depth buffer clear operation. >> + */ >> + if (!num_layers_cleared) > > ^^^^^^^^^^^^^^^^^^^^ > This is an integer, not a boolean, and therefore ``num_layers_cleared == 0`` > is more > readable. > > >> + intel_batchbuffer_emit_mi_flush(brw); >> + >> + intel_hiz_exec(brw, mt, depth_irb->mt_level, layer, >> + GEN6_HIZ_OP_DEPTH_CLEAR); >> + >> + intel_miptree_slice_set_hiz_cleared(mt, >> + depth_irb->mt_level, >> + layer, >> + true); >> + num_layers_cleared++; >> + } >> } >> } else { >> - intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer, >> - GEN6_HIZ_OP_DEPTH_CLEAR); >> + if (clear_all_layers || >> + !intel_miptree_slice_get_hiz_cleared(mt, >> + depth_irb->mt_level, >> + depth_irb->mt_layer)) { >> + intel_batchbuffer_emit_mi_flush(brw); >> + intel_hiz_exec(brw, mt, depth_irb->mt_level, >> depth_irb->mt_layer, >> + GEN6_HIZ_OP_DEPTH_CLEAR); >> + >> + intel_miptree_slice_set_hiz_cleared(mt, >> + depth_irb->mt_level, >> + depth_irb->mt_layer, >> + true); >> + num_layers_cleared = 1; >> + } >> } >> >> + if (!num_layers_cleared) >> + return true; >> + >> if (brw->gen == 6) { >> /* From the Sandy Bridge PRM, volume 2 part 1, page 314: >> * > > > Several lines down is this: > > intel_renderbuffer_att_set_needs_depth_resolve(depth_att); > > return true; > } // end of function > > The new code added in this patch would be more concise, and more closely > follow the pattern of > existing code, if you removed all the calls to > ``intel_miptree_slice_set_hiz_cleared()`` and > replaced them with a single ``intel_renderbuffer_att_set_hiz_cleared(..., > true)``. > > There is another good use of intel_renderbuffer_att_set_hiz_cleared() in the > below hunk > for brw_postdraw_set_buffers_need_resolve(). Since intel_renderbuffer_att_set_needs_depth_resolve is called when the depth buffer is rendered to or cleared, we can extend it to set/unset the hiz_cleared flag. I've sent out v2 doing that (as well as incorporating all your other comments).
> > >> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c >> b/src/mesa/drivers/dri/i965/brw_draw.c >> index b898cd3..4ebfe44 100644 >> --- a/src/mesa/drivers/dri/i965/brw_draw.c >> +++ b/src/mesa/drivers/dri/i965/brw_draw.c >> @@ -363,8 +363,22 @@ static void >> brw_postdraw_set_buffers_need_resolve(struct brw_context *brw) >> intel_renderbuffer_set_needs_downsample(front_irb); >> if (back_irb) >> intel_renderbuffer_set_needs_downsample(back_irb); >> - if (depth_irb && ctx->Depth.Mask) >> + if (depth_irb && ctx->Depth.Mask) { >> intel_renderbuffer_att_set_needs_depth_resolve(depth_att); >> + >> + if (depth_irb->mt) { >> + if (depth_att->Layered) { >> + intel_miptree_set_all_slices_hiz_cleared(depth_irb->mt, >> + depth_irb->mt_level, >> + false); >> + } else { >> + intel_miptree_slice_set_hiz_cleared(depth_irb->mt, >> + depth_irb->mt_level, >> + depth_irb->mt_layer, >> + false); >> + } >> + } >> + } > > > This code would be easier to read if the new if-tree were replaced with a > single line, ``intel_renderbuffer_att_set_hiz_cleared(...)``. > > >> } >> >> /* May fail if out of video memory for texture or vbo upload, or on > > > >> @@ -2256,6 +2292,7 @@ intel_miptree_map_singlesample(struct brw_context >> *brw, >> intel_miptree_slice_resolve_depth(brw, mt, level, slice); >> if (map->mode & GL_MAP_WRITE_BIT) { >> intel_miptree_slice_set_needs_hiz_resolve(mt, level, slice); >> + intel_miptree_slice_set_hiz_cleared(mt, level, slice, false); > > > Again, ``intel_miptree_set_needs_hiz_resolve()`` should automatically set > ``hiz_cleared = false``. > > >> } >> >> if (mt->format == MESA_FORMAT_S8) { > > -- o...@lunarg.com _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev