On 05/31/2018 04:33 AM, Ian Romanick wrote:
On 05/30/2018 06:35 PM, Ian Romanick wrote:
As is, this does a lot of damage on most Intel platforms.  The Skylake
results are below.  I've looked a couple of the worst-hit shaders, and I
may have a couple small fixes.  I'm also going to try the patch tarceri
sent out earlier.

I'll look into this more tomorrow.  My initial analysis is that
something here causes the loop unroller to miscalculate the number of
loop iterations for some loops in some Plane Shift shaders.  The results
in a bunch of extra flow control and general fail. :(


You probably need "nir: add unsigned comparison simplifications" in your tree, it fixes loop unrolling in Wolfenstein 2.

Ignoring Plane Shift, it looks like all of the other hurt shaders are
only hurt by one or two instructions.  I will look at a couple of those
shaders too.

Let me know if you have an example. Thanks!


total instructions in shared programs: 14371511 -> 14373672 (0.02%)
instructions in affected programs: 236840 -> 239001 (0.91%)
helped: 304
HURT: 555
helped stats (abs) min: 1 max: 27 x̄: 2.04 x̃: 1
helped stats (rel) min: 0.07% max: 6.52% x̄: 1.26% x̃: 0.80%
HURT stats (abs)   min: 1 max: 13 x̄: 5.01 x̃: 1
HURT stats (rel)   min: 0.08% max: 9.32% x̄: 2.68% x̃: 1.23%
95% mean confidence interval for instructions value: 2.17 2.86
95% mean confidence interval for instructions %-change: 1.08% 1.49%
Instructions are HURT.

total cycles in shared programs: 532435927 -> 532451249 (<.01%)
cycles in affected programs: 5088752 -> 5104074 (0.30%)
helped: 330
HURT: 579
helped stats (abs) min: 1 max: 2700 x̄: 107.57 x̃: 30
helped stats (rel) min: <.01% max: 24.11% x̄: 4.42% x̃: 1.58%
HURT stats (abs)   min: 1 max: 336 x̄: 87.77 x̃: 18
HURT stats (rel)   min: <.01% max: 37.47% x̄: 7.34% x̃: 1.69%
95% mean confidence interval for cycles value: 5.36 28.35
95% mean confidence interval for cycles %-change: 2.38% 3.76%
Cycles are HURT.

total spills in shared programs: 8114 -> 8111 (-0.04%)
spills in affected programs: 50 -> 47 (-6.00%)
helped: 1
HURT: 0

total fills in shared programs: 11082 -> 11077 (-0.05%)
fills in affected programs: 68 -> 63 (-7.35%)
helped: 1
HURT: 0


On 05/30/2018 05:21 AM, Samuel Pitoiset wrote:
This pass turns:

    if (cond) {
    } else {
       do_work();
    }

into:

    if (!cond) {
       do_work();
    } else {
    }

Here's the vkpipeline-db stats (from affected shaders) on Polaris10:

Totals from affected shaders:
SGPRS: 17272 -> 17296 (0.14 %)
VGPRS: 18712 -> 18740 (0.15 %)
Spilled SGPRs: 1179 -> 1142 (-3.14 %)
Code Size: 1503364 -> 1515176 (0.79 %) bytes
Max Waves: 916 -> 911 (-0.55 %)

This pass only affects Serious Sam 2017 (Vulkan) on my side. The
stats are not really good for now. Some shaders look quite dumb
but this will be improved with further NIR passes, like ifs
combination.

Cc: Ian Romanick <ian.d.roman...@intel.com>
Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
---
  src/compiler/nir/nir_opt_if.c | 98 +++++++++++++++++++++++++++++++++--
  1 file changed, 93 insertions(+), 5 deletions(-)

diff --git a/src/compiler/nir/nir_opt_if.c b/src/compiler/nir/nir_opt_if.c
index 68dacea770..b11c1852de 100644
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -22,6 +22,7 @@
   */
#include "nir.h"
+#include "nir/nir_builder.h"
  #include "nir_control_flow.h"
/**
@@ -201,7 +202,90 @@ opt_peel_loop_initial_if(nir_loop *loop)
  }
static bool
-opt_if_cf_list(struct exec_list *cf_list)
+is_block_empty(nir_block *block)
+{
+   return nir_cf_node_is_last(&block->cf_node) &&
+          exec_list_is_empty(&block->instr_list);
+}
+
+/**
+ * This optimization turns:
+ *
+ *     if (cond) {
+ *     } else {
+ *         do_work();
+ *     }
+ *
+ * into:
+ *
+ *     if (!cond) {
+ *         do_work();
+ *     } else {
+ *     }
+ */
+static bool
+opt_if_simplification(nir_builder *b, nir_if *nif)
+{
+   nir_instr *src_instr;
+
+   /* Only simplify if the then block is empty and the else block is not. */
+   if (!is_block_empty(nir_if_first_then_block(nif)) ||
+       is_block_empty(nir_if_first_else_block(nif)))
+      return false;
+
+   /* Make sure the condition is a comparison operation. */
+   src_instr = nif->condition.ssa->parent_instr;
+   if (src_instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr);
+   if (!nir_alu_instr_is_comparison(alu_instr))
+      return false;
+
+   /* Insert the inverted instruction and rewrite the condition. */
+   b->cursor = nir_after_instr(&alu_instr->instr);
+
+   nir_ssa_def *new_condition =
+      nir_inot(b, &alu_instr->dest.dest.ssa);
+
+   nir_if_rewrite_condition(nif, nir_src_for_ssa(new_condition));
+
+   /* Grab pointers to the last then/else blocks for fixing up the phis. */
+   nir_block *then_block = nir_if_last_then_block(nif);
+   nir_block *else_block = nir_if_last_else_block(nif);
+
+   /* Walk all the phis in the block immediately following the if statement and
+    * swap the blocks.
+    */
+   nir_block *after_if_block =
+      nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
+
+   nir_foreach_instr(instr, after_if_block) {
+      if (instr->type != nir_instr_type_phi)
+         continue;
+
+      nir_phi_instr *phi = nir_instr_as_phi(instr);
+
+      foreach_list_typed(nir_phi_src, src, node, &phi->srcs) {
+         if (src->pred == else_block) {
+            src->pred = then_block;
+         } else if (src->pred == then_block) {
+            src->pred = else_block;
+         }
+      }
+   }
+
+   /* Finally, move the else block to the then block. */
+   nir_cf_list tmp;
+   nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list),
+                        nir_after_cf_list(&nif->else_list));
+   nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list));
+   nir_cf_delete(&tmp);
+
+   return true;
+}
+static bool
+opt_if_cf_list(nir_builder *b, struct exec_list *cf_list)
  {
     bool progress = false;
     foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
@@ -211,14 +295,15 @@ opt_if_cf_list(struct exec_list *cf_list)
case nir_cf_node_if: {
           nir_if *nif = nir_cf_node_as_if(cf_node);
-         progress |= opt_if_cf_list(&nif->then_list);
-         progress |= opt_if_cf_list(&nif->else_list);
+         progress |= opt_if_cf_list(b, &nif->then_list);
+         progress |= opt_if_cf_list(b, &nif->else_list);
+         progress |= opt_if_simplification(b, nif);
           break;
        }
case nir_cf_node_loop: {
           nir_loop *loop = nir_cf_node_as_loop(cf_node);
-         progress |= opt_if_cf_list(&loop->body);
+         progress |= opt_if_cf_list(b, &loop->body);
           progress |= opt_peel_loop_initial_if(loop);
           break;
        }
@@ -240,7 +325,10 @@ nir_opt_if(nir_shader *shader)
        if (function->impl == NULL)
           continue;
- if (opt_if_cf_list(&function->impl->body)) {
+      nir_builder b;
+      nir_builder_init(&b, function->impl);
+
+      if (opt_if_cf_list(&b, &function->impl->body)) {
           nir_metadata_preserve(function->impl, nir_metadata_none);
/* If that made progress, we're no longer really in SSA form. We


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to