[Mesa-dev] [PATCH] radv/nir: avoid packing vertex outputs we can eliminate

Timothy Arceri Wed, 31 Oct 2018 03:43:04 -0700

For now I have only enabled this for RADV we can do it
also for radeonsi also but we need to add a CAP for it.


vkpipeline-db results:

Totals from affected shaders:
SGPRS: 4104 -> 3728 (-9.16 %)
VGPRS: 3604 -> 3472 (-3.66 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 186868 -> 186740 (-0.07 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 935 -> 968 (3.53 %)
Wait states: 0 -> 0 (0.00 %)

The VGPR and Max wave changes are in Rise of The Tomb Raider,
there were also SGPR changes in Nier.
---
 src/amd/vulkan/radv_pipeline.c            |   2 +-
 src/compiler/nir/nir.h                    |   3 +-
 src/compiler/nir/nir_linking_helpers.c    | 111 +++++++++++++++++++++-
 src/mesa/state_tracker/st_glsl_to_nir.cpp |   2 +-
 4 files changed, 114 insertions(+), 4 deletions(-)

diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 8f283365069..99dc54e8ed3 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1823,7 +1823,7 @@ radv_link_shaders(struct radv_pipeline *pipeline, 
nir_shader **shaders)
                                                           ordered_shaders[i - 
1]);
 
                nir_compact_varyings(ordered_shaders[i],
-                                    ordered_shaders[i - 1], true);
+                                    ordered_shaders[i - 1], true, true);
 
                if (progress) {
                        if (nir_lower_global_vars_to_local(ordered_shaders[i])) 
{
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 60ea4fbc7ff..e69612c23ae 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -2798,7 +2798,8 @@ bool nir_remove_unused_io_vars(nir_shader *shader, struct 
exec_list *var_list,
                                uint64_t *used_by_other_stage,
                                uint64_t *used_by_other_stage_patches);
 void nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
-                          bool default_to_smooth_interp);
+                          bool default_to_smooth_interp,
+                          bool ignore_constant_varyings);
 void nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer);
 
 typedef enum {
diff --git a/src/compiler/nir/nir_linking_helpers.c 
b/src/compiler/nir/nir_linking_helpers.c
index 4498d57ac4a..ab29fcc8a1f 100644
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -524,6 +524,111 @@ compact_components(nir_shader *producer, nir_shader 
*consumer, uint8_t *comps,
                               &producer->info.patch_outputs_read);
 }
 
+/*
+ * On AMD hardware we can eliminate certain constant outputs between the
+ * vertex and fragment shader. Here we avoid packing varyings with the outputs
+ * we can potentialy eliminate.
+ *
+ * The four constant combination we can eliminate are as follows:
+ *
+ *    0.0, 0.0, 0.0, {0.0,1.0}
+ *    1.0, 1.0, 1.0, {0.0,1.0}
+ *
+ * TODO: We could also try repacking varying constants in order to produce
+ * these combinations.
+ */
+static void
+skip_packing_constant_varyings(nir_shader *producer, nir_shader *consumer,
+                               uint8_t *comps)
+{
+   if (consumer->info.stage != MESA_SHADER_FRAGMENT)
+      return;
+
+   if (producer->info.stage != MESA_SHADER_VERTEX &&
+       producer->info.stage != MESA_SHADER_TESS_EVAL)
+      return;
+
+   uint8_t comps_zero[MAX_VARYING] = {0};
+   uint8_t comps_one[MAX_VARYING] = {0};
+   uint32_t skip_locations = 0;
+
+   nir_function_impl *impl = nir_shader_get_entrypoint(producer);
+
+   nir_foreach_block(block, impl) {
+      nir_foreach_instr(instr, block) {
+         if (instr->type != nir_instr_type_intrinsic)
+            continue;
+
+         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+         if (intr->intrinsic != nir_intrinsic_store_deref)
+            continue;
+
+         nir_variable *var =
+            nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
+
+         if (var->data.location < VARYING_SLOT_VAR0 ||
+             var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) {
+            continue;
+         }
+
+         unsigned location = var->data.location - VARYING_SLOT_VAR0;
+         if (skip_locations & (1 << location)) {
+            continue;
+         }
+
+         /* To keep things simple only work with varyings we were able to
+          * previously split.
+          */
+         if (intr->num_components != 1) {
+            skip_locations |= 1 << location;
+            continue;
+         }
+
+         if (glsl_get_base_type(var->type) != GLSL_TYPE_FLOAT) {
+            skip_locations |= 1 << location;
+            continue;
+         }
+
+         if (intr->src[1].ssa->parent_instr->type != 
nir_instr_type_load_const) {
+            skip_locations |= 1 << location;
+            continue;
+         }
+
+         nir_load_const_instr *const_instr =
+            nir_instr_as_load_const(intr->src[1].ssa->parent_instr);
+
+         assert(const_instr->def.bit_size == 32);
+         if (const_instr->value.f32[0] == 0.0) {
+            if (comps_one[location] & 0x7) {
+               skip_locations |= 1 << location;
+               continue;
+            } else {
+               comps_zero[location] |= 1 << var->data.location_frac;
+            }
+         } else if (const_instr->value.f32[0] == 1.0) {
+            if (comps_zero[location] & 0x7) {
+               skip_locations |= 1 << location;
+               continue;
+            } else {
+               comps_one[location] |= 1 << var->data.location_frac;
+            }
+         } else {
+            skip_locations |= 1 << location;
+         }
+      }
+   }
+
+   for (unsigned i = 0; i < MAX_VARYING; i++) {
+      if (skip_locations & 1 << i)
+         continue;
+
+      if (comps_one[i] || comps_zero[i]) {
+         comps[i] = 0xf;
+      }
+   }
+}
+
 /* We assume that this has been called more-or-less directly after
  * remove_unused_varyings.  At this point, all of the varyings that we
  * aren't going to be using have been completely removed and the
@@ -536,7 +641,8 @@ compact_components(nir_shader *producer, nir_shader 
*consumer, uint8_t *comps,
  */
 void
 nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
-                     bool default_to_smooth_interp)
+                     bool default_to_smooth_interp,
+                     bool ignore_constant_varyings)
 {
    assert(producer->info.stage != MESA_SHADER_FRAGMENT);
    assert(consumer->info.stage != MESA_SHADER_VERTEX);
@@ -545,6 +651,9 @@ nir_compact_varyings(nir_shader *producer, nir_shader 
*consumer,
    uint8_t interp_type[MAX_VARYINGS_INCL_PATCH] = {0};
    uint8_t interp_loc[MAX_VARYINGS_INCL_PATCH] = {0};
 
+   if (ignore_constant_varyings)
+      skip_packing_constant_varyings(producer, consumer, comps);
+
    get_slot_component_masks_and_interp_types(&producer->outputs, comps,
                                              interp_type, interp_loc,
                                              producer->info.stage,
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp 
b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 911284401e0..c6bb1ac1162 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -751,7 +751,7 @@ st_link_nir(struct gl_context *ctx,
           */
          if (!prev_shader->sh.LinkedTransformFeedback)
             
nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
-                              nir, ctx->API != API_OPENGL_COMPAT);
+                                 nir, ctx->API != API_OPENGL_COMPAT, false);
       }
       prev = i;
    }
-- 
2.17.2

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] radv/nir: avoid packing vertex outputs we can eliminate

Reply via email to