On 24.04.2017 15:22, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com>

I don't like it. This kind of app-specific override is what drirc was meant to provide. Having separate places for it is confusing.

Cheers,
Nicolai


---
 src/gallium/drivers/radeonsi/si_pipe.c       | 20 +++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.h       |  1 +
 src/gallium/drivers/radeonsi/si_state_draw.c | 45 ++++++++++++++++++++--------
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 1a83564..53a8201 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -29,20 +29,29 @@
 #include "radeon/radeon_uvd.h"
 #include "util/u_memory.h"
 #include "util/u_suballoc.h"
 #include "util/u_tests.h"
 #include "vl/vl_decoder.h"
 #include "../ddebug/dd_util.h"

 #define SI_LLVM_DEFAULT_FEATURES \
        "+DumpCode,+vgpr-spilling,-fp32-denormals,-xnack"

+/* DX10/11 apply primitive restart to strip primitive types only. */
+static const char *apps_with_prim_restart_dx_behavior[] = {
+       "DeusExMD",
+       "DirtRally",
+       "HitmanPro",
+       "MadMax",
+       "TotalWarhammer",
+};
+
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
 {
        struct si_context *sctx = (struct si_context *)context;
        int i;

        /* Unreference the framebuffer normally to disable related logic
         * properly.
@@ -306,20 +315,31 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen,
         *
         * The recommended value is 4 per CU at most. Higher numbers don't
         * bring much benefit, but they still occupy chip resources (think
         * async compute). I've seen ~2% performance difference between 4 and 
32.
         */
        sctx->scratch_waves = MAX2(32 * sscreen->b.info.num_good_compute_units,
                                   max_threads_per_block / 64);

        sctx->tm = si_create_llvm_target_machine(sscreen);

+       /* Process the app list. */
+       char process_name[128];
+       if (os_get_process_name(process_name, sizeof(process_name))) {
+               for (i = 0; i < ARRAY_SIZE(apps_with_prim_restart_dx_behavior); 
i++) {
+                       if (strcmp(process_name, 
apps_with_prim_restart_dx_behavior[i]) == 0) {
+                               sctx->use_prim_restart_dx_behavior = true;
+                               break;
+                       }
+               }
+       }
+
        return &sctx->b.b;
 fail:
        fprintf(stderr, "radeonsi: Failed to create a context.\n");
        si_destroy_context(&sctx->b.b);
        return NULL;
 }

 /*
  * pipe_screen
  */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index ea61e1e..1edcfbc 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -236,20 +236,21 @@ struct si_context {

        struct radeon_winsys_cs         *ce_ib;
        struct radeon_winsys_cs         *ce_preamble_ib;
        bool                            ce_need_synchronization;
        struct u_suballocator           *ce_suballocator;

        struct si_shader_ctx_state      fixed_func_tcs_shader;
        LLVMTargetMachineRef            tm; /* only non-threaded compilation */
        bool                            gfx_flush_in_progress;
        bool                            compute_is_busy;
+       bool                            use_prim_restart_dx_behavior;

        /* Atoms (direct states). */
        union si_state_atoms            atoms;
        unsigned                        dirty_atoms; /* mask */
        /* PM4 states (precomputed immutable states) */
        unsigned                        dirty_states;
        union si_state                  queued;
        union si_state                  emitted;

        /* Atom declarations. */
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index e6a9ee0..319160e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -445,42 +445,43 @@ void si_init_ia_multi_vgt_param_table(struct si_context 
*sctx)
                key.u.tcs_tes_uses_prim_id = tess_uses_primid;
                key.u.uses_gs = uses_gs;

                sctx->ia_multi_vgt_param[key.index] =
                        si_get_init_multi_vgt_param(sctx->screen, &key);
        }
 }

 static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
                                          const struct pipe_draw_info *info,
-                                         unsigned num_patches)
+                                         unsigned num_patches,
+                                         bool primitive_restart)
 {
        union si_vgt_param_key key = sctx->ia_multi_vgt_param_key;
        unsigned primgroup_size;
        unsigned ia_multi_vgt_param;

        if (sctx->tes_shader.cso) {
                primgroup_size = num_patches; /* must be a multiple of 
NUM_PATCHES */
        } else if (sctx->gs_shader.cso) {
                primgroup_size = 64; /* recommended with a GS */
        } else {
                primgroup_size = 128; /* recommended without a GS and tess */
        }

        key.u.prim = info->mode;
        key.u.uses_instancing = info->indirect || info->instance_count > 1;
        key.u.multi_instances_smaller_than_primgroup =
                info->indirect ||
                (info->instance_count > 1 &&
                 (info->count_from_stream_output ||
                  si_num_prims_for_vertices(info) < primgroup_size));
-       key.u.primitive_restart = info->primitive_restart;
+       key.u.primitive_restart = primitive_restart;
        key.u.count_from_stream_output = info->count_from_stream_output != NULL;

        ia_multi_vgt_param = sctx->ia_multi_vgt_param[key.index] |
                             S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1);

        if (sctx->gs_shader.cso) {
                /* GS requirement. */
                if (sctx->b.chip_class <= VI &&
                    SI_GS_PER_ES / primgroup_size >= 
sctx->screen->gs_table_depth - 3)
                        ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1);
@@ -544,28 +545,30 @@ static void si_emit_vs_state(struct si_context *sctx,
                        sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX] +
                        SI_SGPR_VS_STATE_BITS * 4,
                        sctx->current_vs_state);

                sctx->last_vs_state = sctx->current_vs_state;
        }
 }

 static void si_emit_draw_registers(struct si_context *sctx,
                                   const struct pipe_draw_info *info,
-                                  unsigned num_patches)
+                                  unsigned num_patches,
+                                  bool primitive_restart)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
        unsigned prim = si_conv_pipe_prim(info->mode);
        unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim);
        unsigned ia_multi_vgt_param;

-       ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches);
+       ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches,
+                                                      primitive_restart);

        /* Draw state. */
        if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
                if (sctx->b.chip_class >= GFX9)
                        radeon_set_uconfig_reg_idx(cs, 
R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
                else if (sctx->b.chip_class >= CIK)
                        radeon_set_context_reg_idx(cs, 
R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
                else
                        radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, 
ia_multi_vgt_param);

@@ -579,32 +582,32 @@ static void si_emit_draw_registers(struct si_context 
*sctx,

                sctx->last_prim = prim;
        }

        if (gs_out_prim != sctx->last_gs_out_prim) {
                radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, 
gs_out_prim);
                sctx->last_gs_out_prim = gs_out_prim;
        }

        /* Primitive restart. */
-       if (info->primitive_restart != sctx->last_primitive_restart_en) {
+       if (primitive_restart != sctx->last_primitive_restart_en) {
                if (sctx->b.chip_class >= GFX9)
                        radeon_set_uconfig_reg(cs, 
R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
-                                              info->primitive_restart);
+                                              primitive_restart);
                else
                        radeon_set_context_reg(cs, 
R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
-                                              info->primitive_restart);
+                                              primitive_restart);

-               sctx->last_primitive_restart_en = info->primitive_restart;
+               sctx->last_primitive_restart_en = primitive_restart;

        }
-       if (info->primitive_restart &&
+       if (primitive_restart &&
            (info->restart_index != sctx->last_restart_index ||
             sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) {
                radeon_set_context_reg(cs, 
R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
                                       info->restart_index);
                sctx->last_restart_index = info->restart_index;
        }
 }

 static void si_emit_draw_packets(struct si_context *sctx,
                                 const struct pipe_draw_info *info,
@@ -1129,29 +1132,47 @@ void si_ce_pre_draw_synchronization(struct si_context 
*sctx)
 void si_ce_post_draw_synchronization(struct si_context *sctx)
 {
        if (sctx->ce_need_synchronization) {
                radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_INCREMENT_DE_COUNTER, 0, 
0));
                radeon_emit(sctx->b.gfx.cs, 0);

                sctx->ce_need_synchronization = false;
        }
 }

+static bool is_strip_primitive_mode(unsigned prim)
+{
+       return ((1 << prim) &
+               ((1 << PIPE_PRIM_LINE_STRIP) |
+                (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY) |
+                (1 << PIPE_PRIM_QUAD_STRIP) |
+                (1 << PIPE_PRIM_TRIANGLE_STRIP) |
+                (1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY))) != 0;
+}
+
 void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 {
        struct si_context *sctx = (struct si_context *)ctx;
        struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
        const struct pipe_index_buffer *ib = &sctx->index_buffer;
        struct pipe_index_buffer ib_tmp; /* for index buffer uploads only */
        unsigned mask, dirty_tex_counter;
        enum pipe_prim_type rast_prim;
        unsigned num_patches = 0;
+       bool primitive_restart = info->indexed && info->primitive_restart;
+
+       /* This is better for performance, but the difference might not be
+        * measurable.
+        */
+       if (sctx->use_prim_restart_dx_behavior &&
+           !is_strip_primitive_mode(info->mode))
+               primitive_restart = false;

        if (likely(!info->indirect)) {
                /* SI-CI treat instance_count==0 as instance_count==1. There is
                 * no workaround for indirect draws, but we can at least skip
                 * direct draws.
                 */
                if (unlikely(!info->instance_count))
                        return;

                /* Handle count == 0. */
@@ -1207,21 +1228,21 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
        if (sctx->gs_shader.cso) {
                /* Determine whether the GS triangle strip adjacency fix should
                 * be applied. Rotate every other triangle if
                 * - triangle strips with adjacency are fed to the GS and
                 * - primitive restart is disabled (the rotation doesn't help
                 *   when the restart occurs after an odd number of triangles).
                 */
                bool gs_tri_strip_adj_fix =
                        !sctx->tes_shader.cso &&
                        info->mode == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY &&
-                       !info->primitive_restart;
+                       !primitive_restart;

                if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) {
                        sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
                        sctx->do_update_shaders = true;
                }
        }

        if (sctx->do_update_shaders && !si_update_shaders(sctx))
                return;

@@ -1338,21 +1359,21 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)

                si_pm4_emit(sctx, state);
                sctx->emitted.array[i] = state;
        }
        sctx->dirty_states = 0;

        si_emit_rasterizer_prim_state(sctx);
        if (sctx->tes_shader.cso)
                si_emit_derived_tess_state(sctx, info, &num_patches);
        si_emit_vs_state(sctx, info);
-       si_emit_draw_registers(sctx, info, num_patches);
+       si_emit_draw_registers(sctx, info, num_patches, primitive_restart);

        si_ce_pre_draw_synchronization(sctx);
        si_emit_draw_packets(sctx, info, ib);
        si_ce_post_draw_synchronization(sctx);

        if (sctx->trace_buf)
                si_trace_emit(sctx);

        /* Workaround for a VGT hang when streamout is enabled.
         * It must be done after drawing. */
@@ -1389,21 +1410,21 @@ void si_draw_vbo(struct pipe_context *ctx, const struct 
pipe_draw_info *info)
                                        rtex->dirty_level_mask |= 1 << 
surf->u.tex.level;
                                if (rtex->dcc_gather_statistics)
                                        rtex->separate_dcc_dirty = true;
                        } while (mask);
                }
                sctx->framebuffer.do_update_surf_dirtiness = false;
        }

        pipe_resource_reference(&ib_tmp.buffer, NULL);
        sctx->b.num_draw_calls++;
-       if (info->primitive_restart)
+       if (primitive_restart)
                sctx->b.num_prim_restart_calls++;
        if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size))
                sctx->b.num_spill_draw_calls++;
 }

 void si_trace_emit(struct si_context *sctx)
 {
        struct radeon_winsys_cs *cs = sctx->b.gfx.cs;

        sctx->trace_id++;



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to