From: Nicolai Hähnle <nicolai.haeh...@amd.com>

This option enables a performance optimization where typical non-blending
draws with depth buffer may be rasterized out-of-order (on VI+, multi-SE
chips).

This optimization can lead to incorrect results when an applications
renders multiple objects with the same Z value at the same pixel, so we
will never enable it by default. But there may be applications that could
benefit from white-listing.
---
 src/gallium/drivers/radeonsi/driinfo_radeonsi.h | 1 +
 src/gallium/drivers/radeonsi/si_pipe.c          | 2 ++
 src/gallium/drivers/radeonsi/si_pipe.h          | 1 +
 src/gallium/drivers/radeonsi/si_state.c         | 8 ++++----
 src/util/xmlpool/t_options.h                    | 5 +++++
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h 
b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
index af6284a7787..8be85289a0c 100644
--- a/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
+++ b/src/gallium/drivers/radeonsi/driinfo_radeonsi.h
@@ -1,4 +1,5 @@
 // DriConf options specific to radeonsi
 DRI_CONF_SECTION_PERFORMANCE
     DRI_CONF_RADEONSI_ENABLE_SISCHED("false")
+    DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS("false")
 DRI_CONF_SECTION_END
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 9f3651f2526..b4972be739c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1041,20 +1041,22 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws,
                 sscreen->b.info.pfp_fw_version >= 211 &&
                 sscreen->b.info.me_fw_version >= 173) ||
                (sscreen->b.chip_class == SI &&
                 sscreen->b.info.pfp_fw_version >= 79 &&
                 sscreen->b.info.me_fw_version >= 142);
 
        sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
        sscreen->has_out_of_order_rast = sscreen->b.chip_class >= VI &&
                                         sscreen->b.info.max_se >= 2 &&
                                         !(sscreen->b.debug_flags & 
DBG_NO_OUT_OF_ORDER);
+       sscreen->assume_no_z_fights =
+               driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
        sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 
&&
                                            sscreen->b.family <= 
CHIP_POLARIS12) ||
                                           sscreen->b.family == CHIP_VEGA10 ||
                                           sscreen->b.family == CHIP_RAVEN;
        sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 &&
                                !(sscreen->b.debug_flags & DBG_NO_DPBB);
        sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
                                !(sscreen->b.debug_flags & DBG_NO_DFSM);
 
        /* While it would be nice not to have this flag, we are constrained
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b8073ce9c09..d200c9f571f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -89,20 +89,21 @@ struct u_suballocator;
 
 struct si_screen {
        struct r600_common_screen       b;
        unsigned                        gs_table_depth;
        unsigned                        tess_offchip_block_dw_size;
        bool                            has_clear_state;
        bool                            has_distributed_tess;
        bool                            has_draw_indirect_multi;
        bool                            has_ds_bpermute;
        bool                            has_out_of_order_rast;
+       bool                            assume_no_z_fights;
        bool                            has_msaa_sample_loc_bug;
        bool                            dpbb_allowed;
        bool                            dfsm_allowed;
        bool                            llvm_has_working_vgpr_indexing;
 
        /* Whether shaders are monolithic (1-part) or separate (3-part). */
        bool                            use_monolithic_shaders;
        bool                            record_llvm_ir;
 
        mtx_t                   shader_parts_mutex;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 06f86aaf92a..a8af5752771 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1087,20 +1087,21 @@ static bool si_order_invariant_stencil_state(const 
struct pipe_stencil_state *st
               (state->func == PIPE_FUNC_ALWAYS &&
                si_order_invariant_stencil_op(state->zpass_op) &&
                si_order_invariant_stencil_op(state->zfail_op)) ||
               (state->func == PIPE_FUNC_NEVER &&
                si_order_invariant_stencil_op(state->fail_op));
 }
 
 static void *si_create_dsa_state(struct pipe_context *ctx,
                                 const struct pipe_depth_stencil_alpha_state 
*state)
 {
+       struct si_context *sctx = (struct si_context *)ctx;
        struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
        struct si_pm4_state *pm4 = &dsa->pm4;
        unsigned db_depth_control;
        uint32_t db_stencil_control = 0;
 
        if (!dsa) {
                return NULL;
        }
 
        dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask;
@@ -1179,27 +1180,26 @@ static void *si_create_dsa_state(struct pipe_context 
*ctx,
        dsa->order_invariance[1].pass_set =
                nozwrite_and_order_invariant_stencil ||
                (!dsa->stencil_write_enabled &&
                 (state->depth.func == PIPE_FUNC_ALWAYS ||
                  state->depth.func == PIPE_FUNC_NEVER));
        dsa->order_invariance[0].pass_set =
                !dsa->depth_write_enabled ||
                (state->depth.func == PIPE_FUNC_ALWAYS ||
                 state->depth.func == PIPE_FUNC_NEVER);
 
-       const bool assume_no_z_fights = false;
-
        dsa->order_invariance[1].pass_last =
-               assume_no_z_fights && !dsa->stencil_write_enabled &&
+               sctx->screen->assume_no_z_fights &&
+               !dsa->stencil_write_enabled &&
                dsa->depth_write_enabled && zfunc_is_ordered;
        dsa->order_invariance[0].pass_last =
-               assume_no_z_fights &&
+               sctx->screen->assume_no_z_fights &&
                dsa->depth_write_enabled && zfunc_is_ordered;
 
        return dsa;
 }
 
 static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
 {
         struct si_context *sctx = (struct si_context *)ctx;
        struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
         struct si_state_dsa *dsa = state;
diff --git a/src/util/xmlpool/t_options.h b/src/util/xmlpool/t_options.h
index d3f31fc94b1..c92215183a5 100644
--- a/src/util/xmlpool/t_options.h
+++ b/src/util/xmlpool/t_options.h
@@ -431,10 +431,15 @@ DRI_CONF_OPT_BEGIN(csmt_force, int, def) \
 DRI_CONF_OPT_END
 
 /**
  * \brief radeonsi specific configuration options
  */
 
 #define DRI_CONF_RADEONSI_ENABLE_SISCHED(def) \
 DRI_CONF_OPT_BEGIN_B(radeonsi_enable_sisched, def) \
         DRI_CONF_DESC(en,gettext("Use the LLVM sisched option for shader 
compiles")) \
 DRI_CONF_OPT_END
+
+#define DRI_CONF_RADEONSI_ASSUME_NO_Z_FIGHTS(def) \
+DRI_CONF_OPT_BEGIN_B(radeonsi_assume_no_z_fights, def) \
+        DRI_CONF_DESC(en,gettext("Assume no Z fights (enables aggressive 
out-of-order rasterization to improve performance; may cause rendering 
errors)")) \
+DRI_CONF_OPT_END
-- 
2.11.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to