Mesa (main): r300: small adress register load optimization

GitLab Mirror Wed, 03 Jan 2024 23:18:56 -0800

Module: Mesa
Branch: main
Commit: 75e7790ee5230a555d9e58d573131205d613abfe
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=75e7790ee5230a555d9e58d573131205d613abfe


Author: Pavel Ondračka <pavel.ondra...@gmail.com>
Date:   Wed Sep 20 09:21:35 2023 +0200

r300: small adress register load optimization

We do ffloor by default for adress register load so no need to do it
explicitly. This needs to happen after int lowering, otherwise we get
ftrunc by default as a bonus. This is mostly for wined3d.

Shader-db RV370:
total instructions in shared programs: 82147 -> 82071 (-0.09%)
instructions in affected programs: 2772 -> 2696 (-2.74%)
helped: 32
HURT: 0
total cycles in shared programs: 128479 -> 128403 (-0.06%)
cycles in affected programs: 2813 -> 2737 (-2.70%)
helped: 32
HURT: 0

Shader-db RV530:
total instructions in shared programs: 126141 -> 126057 (-0.07%)
instructions in affected programs: 3170 -> 3086 (-2.65%)
helped: 36
HURT: 0
total cycles in shared programs: 191688 -> 191604 (-0.04%)
cycles in affected programs: 3222 -> 3138 (-2.61%)
helped: 36
HURT: 0

Reviewed-by: Filip Gawin <filip.ga...@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26816>

---

 src/gallium/drivers/r300/compiler/nir_to_rc.c        |  2 ++
 src/gallium/drivers/r300/compiler/r300_nir.h         | 20 ++++++++++++++++++++
 .../drivers/r300/compiler/r300_nir_algebraic.py      | 12 ++++++++++++
 3 files changed, 34 insertions(+)

diff --git a/src/gallium/drivers/r300/compiler/nir_to_rc.c 
b/src/gallium/drivers/r300/compiler/nir_to_rc.c
index 9c9bfaaa241..18b9f17272d 100644
--- a/src/gallium/drivers/r300/compiler/nir_to_rc.c
+++ b/src/gallium/drivers/r300/compiler/nir_to_rc.c
@@ -2440,6 +2440,8 @@ const void *nir_to_rc_options(struct nir_shader *s,
    }
 
    NIR_PASS_V(s, nir_lower_int_to_float);
+   NIR_PASS_V(s, nir_copy_prop);
+   NIR_PASS_V(s, r300_nir_post_integer_lowering);
    NIR_PASS_V(s, nir_lower_bool_to_float,
               !options->lower_cmp && !options->lower_fabs);
    /* bool_to_float generates MOVs for b2f32 that we want to clean up. */
diff --git a/src/gallium/drivers/r300/compiler/r300_nir.h 
b/src/gallium/drivers/r300/compiler/r300_nir.h
index 916eb08fd48..fcf815f3c41 100644
--- a/src/gallium/drivers/r300/compiler/r300_nir.h
+++ b/src/gallium/drivers/r300/compiler/r300_nir.h
@@ -47,6 +47,24 @@ is_ubo_or_input(UNUSED struct hash_table *ht, const 
nir_alu_instr *instr,
    }
 }
 
+static inline bool
+is_only_used_by_load_ubo_vec4(const nir_alu_instr *instr)
+{
+   nir_foreach_use(src, &instr->def) {
+      if (nir_src_is_if(src))
+         return false;
+      nir_instr *user_instr = nir_src_parent_instr(src);
+      if (user_instr->type != nir_instr_type_intrinsic)
+         return false;
+
+      const nir_intrinsic_instr *const user_intrinsic = 
nir_instr_as_intrinsic(user_instr);
+
+      if (user_intrinsic->intrinsic != nir_intrinsic_load_ubo_vec4)
+            return false;
+   }
+   return true;
+}
+
 char *r300_finalize_nir(struct pipe_screen *pscreen, void *nir);
 
 extern bool r300_transform_vs_trig_input(struct nir_shader *shader);
@@ -61,4 +79,6 @@ extern bool r300_nir_prepare_presubtract(struct nir_shader 
*shader);
 
 extern bool r300_nir_clean_double_fneg(struct nir_shader *shader);
 
+extern bool r300_nir_post_integer_lowering(struct nir_shader *shader);
+
 #endif /* R300_NIR_H */
diff --git a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py 
b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
index f03b8eaf6bc..c01b43beefc 100644
--- a/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
+++ b/src/gallium/drivers/r300/compiler/r300_nir_algebraic.py
@@ -94,6 +94,15 @@ r300_nir_clean_double_fneg = [
         (('fneg', ('fneg', a)), a)
 ]
 
+r300_nir_post_integer_lowering = [
+        # If ffloor result is used only for indirect constant load, we can get 
rid of it
+        # completelly as ntt emits ARL by default which already does the 
flooring.
+        # This actually checks for the lowered ffloor(a) = a - ffract(a) 
patterns.
+        (('fadd(is_only_used_by_load_ubo_vec4)', a, ('fneg', ('ffract', a))), 
a),
+        # This is a D3D9 pattern from Wine when shader wants ffloor instead of 
fround on register load.
+        (('fround_even(is_only_used_by_load_ubo_vec4)', ('fadd', a, ('fneg', 
('ffract', a)))), a)
+]
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument('-p', '--import-path', required=True)
@@ -140,5 +149,8 @@ def main():
         f.write(nir_algebraic.AlgebraicPass("r300_nir_clean_double_fneg",
                                             
r300_nir_clean_double_fneg).render())
 
+        f.write(nir_algebraic.AlgebraicPass("r300_nir_post_integer_lowering",
+                                            
r300_nir_post_integer_lowering).render())
+
 if __name__ == '__main__':
     main()

Mesa (main): r300: small adress register load optimization

Reply via email to