On 15/12/18 7:32 am, Marek Olšák wrote:
For patches 1-3:

Reviewed-by: Marek Olšák <marek.ol...@amd.com <mailto:marek.ol...@amd.com>>


Thanks!

I'm not so knowledgeable to be able to comment on patch 4.

Does it also merge varyings such as (x,y,undef,undef) and (x,undef,undef,w)? There is a game which outputs (x,y,z,undef) and (x,y,undef,undef) where the vertex shader is a SSO.

No. All of the link-time optimisations in NIR are currently only applied at actual link-time. I've often thought about adding a way to apply these to SSO shaders but this is unlikely something I will end up working on since our main focus these days is on Vulkan and there we always link the entire pipeline so its not an issue.


Marek

On Mon, Dec 10, 2018 at 11:28 PM Timothy Arceri <tarc...@itsqueeze.com <mailto:tarc...@itsqueeze.com>> wrote:

    If we are outputting the same value to more than one output
    component rewrite the inputs to read from a single component.

    This will allow the duplicate varying components to be optimised
    away by the existing opts.

    shader-db results i965 (SKL):

    total instructions in shared programs: 12869230 -> 12860886 (-0.06%)
    instructions in affected programs: 322601 -> 314257 (-2.59%)
    helped: 3080
    HURT: 8

    total cycles in shared programs: 317792574 -> 317730593 (-0.02%)
    cycles in affected programs: 2584925 -> 2522944 (-2.40%)
    helped: 2975
    HURT: 477

    shader-db results radeonsi (VEGA):

    Totals from affected shaders:
    SGPRS: 30960 -> 31056 (0.31 %)
    VGPRS: 17052 -> 16672 (-2.23 %)
    Spilled SGPRs: 184 -> 167 (-9.24 %)
    Spilled VGPRs: 0 -> 0 (0.00 %)
    Private memory VGPRs: 0 -> 0 (0.00 %)
    Scratch size: 0 -> 0 (0.00 %) dwords per thread
    Code Size: 562532 -> 549404 (-2.33 %) bytes
    LDS: 0 -> 0 (0.00 %) blocks
    Max Waves: 6011 -> 6110 (1.65 %)
    Wait states: 0 -> 0 (0.00 %)

    vkpipeline-db results RADV (VEGA):

    Totals from affected shaders:
    SGPRS: 14880 -> 15080 (1.34 %)
    VGPRS: 10872 -> 10888 (0.15 %)
    Spilled SGPRs: 0 -> 0 (0.00 %)
    Spilled VGPRs: 0 -> 0 (0.00 %)
    Private memory VGPRs: 0 -> 0 (0.00 %)
    Scratch size: 0 -> 0 (0.00 %) dwords per thread
    Code Size: 674016 -> 668396 (-0.83 %) bytes
    LDS: 0 -> 0 (0.00 %) blocks
    Max Waves: 2708 -> 2704 (-0.15 %)
    Wait states: 0 -> 0 (0.00 %
    ---
      src/compiler/nir/nir_linking_helpers.c | 95 ++++++++++++++++++++++++++
      1 file changed, 95 insertions(+)

    diff --git a/src/compiler/nir/nir_linking_helpers.c
    b/src/compiler/nir/nir_linking_helpers.c
    index 37644d339f..bdfa7b8c4d 100644
    --- a/src/compiler/nir/nir_linking_helpers.c
    +++ b/src/compiler/nir/nir_linking_helpers.c
    @@ -700,6 +700,27 @@ nir_link_xfb_varyings(nir_shader *producer,
    nir_shader *consumer)
         }
      }

    +static nir_variable *
    +get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
    +{
    +   nir_variable *input_var = NULL;
    +   nir_foreach_variable(var, &consumer->inputs) {
    +      if (var->data.location >= VARYING_SLOT_VAR0 &&
    +          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
    +
    +         if (var->data.location == out_var->data.location &&
    +             var->data.location_frac == out_var->data.location_frac &&
    +             var->data.interpolation == out_var->data.interpolation &&
    +             get_interp_loc(var) == get_interp_loc(out_var)) {
    +            input_var = var;
    +            break;
    +         }
    +      }
    +   }
    +
    +   return input_var;
    +}
    +
      static bool
      can_replace_varying(nir_variable *out_var, nir_intrinsic_instr
    *store_intr)
      {
    @@ -782,6 +803,57 @@ try_replace_constant_input(nir_shader *shader,
         return progress;
      }

    +static bool
    +try_replace_duplicate_input(nir_shader *shader, nir_variable
    *input_var,
    +                            nir_intrinsic_instr *dup_store_intr)
    +{
    +   assert(input_var);
    +
    +   nir_variable *dup_out_var =
+ nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
    +
    +   if (!can_replace_varying(dup_out_var, dup_store_intr))
    +      return false;
    +
    +   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
    +
    +   nir_builder b;
    +   nir_builder_init(&b, impl);
    +
    +   bool progress = false;
    +   nir_foreach_block(block, impl) {
    +      nir_foreach_instr(instr, block) {
    +         if (instr->type != nir_instr_type_intrinsic)
    +            continue;
    +
    +         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
    +         if (intr->intrinsic != nir_intrinsic_load_deref)
    +            continue;
    +
    +         nir_variable *in_var =
+ nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
    +
    +         if (in_var->data.mode != nir_var_shader_in)
    +            continue;
    +
    +         if (in_var->data.location != dup_out_var->data.location ||
    +             in_var->data.location_frac !=
    dup_out_var->data.location_frac ||
    +             in_var->data.interpolation !=
    input_var->data.interpolation ||
    +             get_interp_loc(in_var) != get_interp_loc(input_var))
    +            continue;
    +
    +         b.cursor = nir_before_instr(instr);
    +
    +         nir_ssa_def *load = nir_load_var(&b, input_var);
    +         nir_ssa_def_rewrite_uses(&intr->dest.ssa,
    nir_src_for_ssa(load));
    +
    +         progress = true;
    +      }
    +   }
    +
    +   return progress;
    +}
    +
      bool
      nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
      {
    @@ -795,6 +867,10 @@ nir_link_opt_varyings(nir_shader *producer,
    nir_shader *consumer)

         nir_function_impl *impl = nir_shader_get_entrypoint(producer);

    +   struct hash_table *varying_values =
    +      _mesa_hash_table_create(NULL,  _mesa_hash_pointer,
    +                              _mesa_key_pointer_equal);
    +
         /* If we find a store in the last block of the producer we can
    be sure this
          * is the only possible value for this output.
          */
    @@ -809,11 +885,30 @@ nir_link_opt_varyings(nir_shader *producer,
    nir_shader *consumer)
               continue;

            if (intr->src[1].ssa->parent_instr->type !=
    nir_instr_type_load_const) {
    +         struct hash_entry *entry =
    +               _mesa_hash_table_search(varying_values,
    intr->src[1].ssa);
    +         if (entry) {
    +            progress |=
    +               try_replace_duplicate_input(consumer,
    +                                           (nir_variable *)
    entry->data,
    +                                           intr);
    +         } else {
    +            nir_variable *out_var =
+  nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
    +            nir_variable *in_var = get_matching_input_var(consumer,
    out_var);
    +            if (in_var && can_replace_varying(out_var, intr)) {
    +               _mesa_hash_table_insert(varying_values,
    intr->src[1].ssa,
    +                                       in_var);
    +            }
    +         }
    +
               continue;
            }

            progress |= try_replace_constant_input(consumer, intr);
         }

    +   _mesa_hash_table_destroy(varying_values, NULL);
    +
         return progress;
      }
-- 2.19.2

    _______________________________________________
    mesa-dev mailing list
    mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org>
    https://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to