Mesa (main): r300: nir fcsel/CMP lowering pass for R500

GitLab Mirror Wed, 03 Jan 2024 23:18:59 -0800

Module: Mesa
Branch: main
Commit: 024491f60fdc6747b33de63fb2bef9e18267e9a9
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=024491f60fdc6747b33de63fb2bef9e18267e9a9


Author: Pavel Ondračka <pavel.ondra...@gmail.com>
Date:   Thu Sep 21 11:59:07 2023 +0200

r300: nir fcsel/CMP lowering pass for R500

Right now this is done in the backend so move it up to NIR. Doing this
in the backend is easier, as at that time we can have a better idea
about when we hit the hardware limits of three different TMP sources,
however moving this to NIR allows for some optimizations. Specifically,
at this time if we decide we actually have to lower we still have the
info if we have plain fcsel for which we can save the comparison and
emit flrp only. During translation to TGSI all of fcsel, fcsel_gt, and
fcsel_ge translate to CMP so at that point the comparison is always needed.

Shader-db RV530:
total instructions in shared programs: 126057 -> 125823 (-0.19%)
instructions in affected programs: 11359 -> 11125 (-2.06%)
helped: 68
HURT: 12
total temps in shared programs: 17043 -> 17023 (-0.12%)
temps in affected programs: 459 -> 439 (-4.36%)
helped: 32
HURT: 12
total cycles in shared programs: 191604 -> 191294 (-0.16%)
cycles in affected programs: 11834 -> 11524 (-2.62%)
helped: 68
HURT: 12

The hurt shaders are some GTK shaders where there is some bad
interaction with nir_move_vec_src_uses_to_dest. This is known and might
be improved later by thweking the pass more.

Reviewed-by: Filip Gawin <filip.ga...@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26816>

---

 src/gallium/drivers/r300/compiler/nir_to_rc.c      |   6 +
 src/gallium/drivers/r300/compiler/r300_nir.h       |   2 +
 .../drivers/r300/compiler/r500_nir_lower_fcsel.c   | 131 +++++++++++++++++++++
 src/gallium/drivers/r300/meson.build               |   1 +
 4 files changed, 140 insertions(+)

diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c 
b/src/gallium/drivers/r300/compiler/nir_to_rc.c
index 18b9f17272d..a54cd0c2f0d 100644
--- a/src/gallium/drivers/r300/compiler/nir_to_rc.c
+++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c
@@ -27,6 +27,7 @@
 #include "compiler/nir/nir_worklist.h"
 #include "nir_to_rc.h"
 #include "r300_nir.h"
+#include "r300_screen.h"
 #include "pipe/p_screen.h"
 #include "pipe/p_state.h"
 #include "tgsi/tgsi_dump.h"
@@ -2372,6 +2373,7 @@ const void *nir_to_rc_options(struct nir_shader *s,
 {
    struct ntr_compile *c;
    const void *tgsi_tokens;
+   bool is_r500 = r300_screen(screen)->caps.is_r500;
    nir_variable_mode no_indirects_mask = ntr_no_indirects_mask(s, screen);
 
    /* Lower array indexing on FS inputs.  Since we don't set
@@ -2446,6 +2448,10 @@ const void *nir_to_rc_options(struct nir_shader *s,
               !options->lower_cmp && !options->lower_fabs);
    /* bool_to_float generates MOVs for b2f32 that we want to clean up. */
    NIR_PASS_V(s, nir_copy_prop);
+   if (s->info.stage == MESA_SHADER_VERTEX) {
+      if (is_r500)
+         NIR_PASS_V(s, r300_nir_lower_fcsel_r500);
+   }
    NIR_PASS_V(s, nir_opt_dce);
 
    nir_move_options move_all =
diff --git a/src/gallium/drivers/r300/compiler/r300_nir.h 
b/src/gallium/drivers/r300/compiler/r300_nir.h
index fcf815f3c41..b546a35077d 100644
--- a/src/gallium/drivers/r300/compiler/r300_nir.h
+++ b/src/gallium/drivers/r300/compiler/r300_nir.h
@@ -81,4 +81,6 @@ extern bool r300_nir_clean_double_fneg(struct nir_shader 
*shader);
 
 extern bool r300_nir_post_integer_lowering(struct nir_shader *shader);
 
+extern bool r300_nir_lower_fcsel_r500(nir_shader *shader);
+
 #endif /* R300_NIR_H */
diff --git a/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c 
b/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c
new file mode 100644
index 00000000000..ac280870850
--- /dev/null
+++ b/src/gallium/drivers/r300/compiler/r500_nir_lower_fcsel.c
@@ -0,0 +1,131 @@
+#include <stdbool.h>
+#include "r300_nir.h"
+#include "nir_builder.h"
+
+static int
+follow_modifiers(nir_instr *instr)
+{
+   /* We don't have texturing so the only other options besides alus are
+    * just load input, load ubo or phi. We can copy propagate the first two
+    * in most cases. The cases when the copy propagate is not guaranteed
+    * to work is with indirect ubo load and in the presence of control flow.
+    * So just be safe and count this as a separate tmp.
+    */
+   if (instr->type == nir_instr_type_intrinsic) {
+      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+      /* It should be enough to check if any of the uses is in phi. */
+      if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 ||
+          intrin->intrinsic == nir_intrinsic_load_constant ||
+          intrin->intrinsic == nir_intrinsic_load_input) {
+          nir_foreach_use(use, &intrin->def) {
+              if (nir_src_parent_instr(use)->type == nir_instr_type_phi)
+                 return intrin->def.index;
+          }
+      }
+      if (intrin->intrinsic == nir_intrinsic_load_ubo_vec4 &&
+          !nir_src_is_const(intrin->src[1]))
+      return intrin->def.index;
+   }
+   /* Assume the worst when we see a phi. */
+   if (instr->type == nir_instr_type_phi)
+      return nir_instr_as_phi(instr)->def.index;
+
+   if (instr->type != nir_instr_type_alu)
+      return -1;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   if (alu->op == nir_op_fneg || alu->op == nir_op_fabs) {
+      return follow_modifiers(alu->src[0].src.ssa->parent_instr);
+   }
+   return alu->def.index;
+}
+
+static bool
+has_three_different_tmp_sources(nir_alu_instr *fcsel)
+{
+   unsigned src_def_index[3];
+   for (unsigned i = 0; i < 3; i++) {
+      int index = follow_modifiers(fcsel->src[i].src.ssa->parent_instr);
+      if (index == -1)
+         return false;
+      else
+        src_def_index[i] = index;
+   }
+   return src_def_index[0] != src_def_index[1] &&
+          src_def_index[0] != src_def_index[2] &&
+          src_def_index[1] != src_def_index[2];
+}
+
+static bool
+is_comparison(nir_instr *instr)
+{
+   if (instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   switch (alu->op) {
+   case nir_op_sge:
+   case nir_op_slt:
+   case nir_op_seq:
+   case nir_op_sne:
+      return true;
+   default:
+      return false;
+   }
+}
+
+static bool
+r300_nir_lower_fcsel_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+   if (instr->type != nir_instr_type_alu)
+      return false;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   if (alu->op != nir_op_fcsel && alu->op != nir_op_fcsel_ge && alu->op != 
nir_op_fcsel_gt)
+      return false;
+
+   if (has_three_different_tmp_sources(alu)) {
+      nir_def *lrp;
+      b->cursor = nir_before_instr(&alu->instr);
+      /* Lower to LRP.
+       * At this point there are no fcsels as all bcsels were converted to
+       * fcsel_gt by nir_lower_bool_to_float, however we can save on the slt
+       * even for nir_op_fcsel_gt if the source is 0 or 1 anyway.
+       */
+      nir_instr *src0_instr = alu->src[0].src.ssa->parent_instr;
+      if (alu->op == nir_op_fcsel ||
+          (alu->op == nir_op_fcsel_gt && is_comparison(src0_instr))) {
+         lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
+                        nir_ssa_for_alu_src(b, alu, 1),
+                        nir_ssa_for_alu_src(b, alu, 0));
+      } else if (alu->op == nir_op_fcsel_ge) {
+         nir_def *sge = nir_sge(b, nir_ssa_for_alu_src(b, alu, 0), 
nir_imm_float(b, 0.0));
+         lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
+                        nir_ssa_for_alu_src(b, alu, 1), sge);
+      } else {
+         nir_def *slt = nir_slt(b, nir_fneg(b, nir_ssa_for_alu_src(b, alu, 0)),
+                                nir_imm_float(b, 0.0));
+         lrp = nir_flrp(b, nir_ssa_for_alu_src(b, alu, 2),
+                        nir_ssa_for_alu_src(b, alu, 1), slt);
+      }
+
+      nir_def_rewrite_uses(&alu->def, lrp);
+      nir_instr_remove(&alu->instr);
+      return true;
+   }
+   return false;
+}
+
+bool
+r300_nir_lower_fcsel_r500(nir_shader *shader)
+{
+   bool progress = nir_shader_instructions_pass(shader,
+                                                r300_nir_lower_fcsel_instr,
+                                                nir_metadata_block_index |
+                                                   nir_metadata_dominance,
+                                                NULL);
+   return progress;
+}
diff --git a/src/gallium/drivers/r300/meson.build 
b/src/gallium/drivers/r300/meson.build
index ec675f4ade8..e47aea43e2e 100644
--- a/src/gallium/drivers/r300/meson.build
+++ b/src/gallium/drivers/r300/meson.build
@@ -77,6 +77,7 @@ files_r300 = files(
   'compiler/r500_fragprog.h',
   'compiler/r300_nir.c',
   'compiler/r300_nir.h',
+  'compiler/r500_nir_lower_fcsel.c',
   'compiler/radeon_code.c',
   'compiler/radeon_code.h',
   'compiler/radeon_compiler.c',

Mesa (main): r300: nir fcsel/CMP lowering pass for R500

Reply via email to