Module: Mesa
Branch: main
Commit: 125741dbae82dea2faf8e594c877701e137e0141
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=125741dbae82dea2faf8e594c877701e137e0141

Author: Georg Lehmann <[email protected]>
Date:   Tue Oct  4 16:02:28 2022 +0200

nir/opt_algebraic: Optimize various find_msb_rev patterns.

>From dxvk, dxil-spirv, fxc, dxc and others.

Totals from 177 (0.13% of 134913) affected shaders:
CodeSize: 1079504 -> 1059872 (-1.82%)
Instrs: 195381 -> 192269 (-1.59%)
Latency: 3664137 -> 3631951 (-0.88%)
InvThroughput: 599479 -> 585675 (-2.30%)

Signed-off-by: Georg Lehmann <[email protected]>
Reviewed-by: Rhys Perry <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18951>

---

 src/amd/vulkan/radv_shader.c          |  1 +
 src/compiler/nir/nir.h                |  3 +++
 src/compiler/nir/nir_opt_algebraic.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 81daff18988..f4bd677e454 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -97,6 +97,7 @@ get_nir_options_for_stage(struct radv_physical_device 
*device, gl_shader_stage s
       .has_sdot_4x8 = device->rad_info.has_accelerated_dot_product,
       .has_udot_4x8 = device->rad_info.has_accelerated_dot_product,
       .has_dot_2x16 = device->rad_info.has_accelerated_dot_product,
+      .has_find_msb_rev = true,
       .use_scoped_barrier = true,
 #ifdef LLVM_AVAILABLE
       .has_fmulz = !device->use_llvm || LLVM_VERSION_MAJOR >= 12,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a460faad950..908cbe78f80 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3617,6 +3617,9 @@ typedef struct nir_shader_compiler_options {
    /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */
    bool has_fmulz;
 
+   /** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */
+   bool has_find_msb_rev;
+
    /**
     * Is this the Intel vec4 backend?
     *
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index b8209f732d8..cc55b866657 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -1686,6 +1686,35 @@ optimizations.extend([
    (('bcsel', ('ine', a, -1), ('ifind_msb', a), -1), ('ifind_msb', a)),
    (('bcsel', ('ine', a, -1), ('ifind_msb_rev', a), -1), ('ifind_msb_rev', a)),
 
+   (('bcsel', ('ine', ('ifind_msb', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ifind_msb', a))), -1), ('ifind_msb_rev', a), 'options->has_find_msb_rev'),
+   (('bcsel', ('ine', ('ufind_msb', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ufind_msb', a))), -1), ('ufind_msb_rev', a), 'options->has_find_msb_rev'),
+   (('bcsel', ('ieq', ('ifind_msb', 'a@32'), -1), -1, ('iadd', 31, ('ineg', 
('ifind_msb', a)))), ('ifind_msb_rev', a), 'options->has_find_msb_rev'),
+   (('bcsel', ('ieq', ('ufind_msb', 'a@32'), -1), -1, ('iadd', 31, ('ineg', 
('ufind_msb', a)))), ('ufind_msb_rev', a), 'options->has_find_msb_rev'),
+   (('bcsel', ('ine', ('ifind_msb', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ifind_msb', a))), ('ifind_msb', a)), ('ifind_msb_rev', a), 
'options->has_find_msb_rev'),
+   (('bcsel', ('ine', ('ufind_msb', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ufind_msb', a))), ('ufind_msb', a)), ('ufind_msb_rev', a), 
'options->has_find_msb_rev'),
+   (('bcsel', ('ieq', ('ifind_msb', 'a@32'), -1), ('ifind_msb', a), ('iadd', 
31, ('ineg', ('ifind_msb', a)))), ('ifind_msb_rev', a), 
'options->has_find_msb_rev'),
+   (('bcsel', ('ieq', ('ufind_msb', 'a@32'), -1), ('ufind_msb', a), ('iadd', 
31, ('ineg', ('ufind_msb', a)))), ('ufind_msb_rev', a), 
'options->has_find_msb_rev'),
+   (('bcsel', ('ine', 'a@32', 0), ('iadd', 31, ('ineg', ('ufind_msb', a))), 
-1), ('ufind_msb_rev', a), 'options->has_find_msb_rev'),
+   (('bcsel', ('ieq', 'a@32', 0), -1, ('iadd', 31, ('ineg', ('ufind_msb', 
a)))), ('ufind_msb_rev', a), 'options->has_find_msb_rev'),
+   (('bcsel', ('ine', 'a@32', 0), ('iadd', 31, ('ineg', ('ufind_msb', a))), 
('ufind_msb', a)), ('ufind_msb_rev', a), 'options->has_find_msb_rev'),
+   (('bcsel', ('ieq', 'a@32', 0), ('ufind_msb', a), ('iadd', 31, ('ineg', 
('ufind_msb', a)))), ('ufind_msb_rev', a), 'options->has_find_msb_rev'),
+
+   (('bcsel', ('ine', ('ifind_msb_rev', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ifind_msb_rev', a))), -1), ('ifind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ine', ('ufind_msb_rev', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ufind_msb_rev', a))), -1), ('ufind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ieq', ('ifind_msb_rev', 'a@32'), -1), -1, ('iadd', 31, 
('ineg', ('ifind_msb_rev', a)))), ('ifind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ieq', ('ufind_msb_rev', 'a@32'), -1), -1, ('iadd', 31, 
('ineg', ('ufind_msb_rev', a)))), ('ufind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ine', ('ifind_msb_rev', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ifind_msb_rev', a))), ('ifind_msb_rev', a)), ('ifind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ine', ('ufind_msb_rev', 'a@32'), -1), ('iadd', 31, ('ineg', 
('ufind_msb_rev', a))), ('ufind_msb_rev', a)), ('ufind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ieq', ('ifind_msb_rev', 'a@32'), -1), ('ifind_msb_rev', a), 
('iadd', 31, ('ineg', ('ifind_msb_rev', a)))), ('ifind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ieq', ('ufind_msb_rev', 'a@32'), -1), ('ufind_msb_rev', a), 
('iadd', 31, ('ineg', ('ufind_msb_rev', a)))), ('ufind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ine', 'a@32', 0), ('iadd', 31, ('ineg', ('ufind_msb_rev', 
a))), -1), ('ufind_msb', a), '!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ieq', 'a@32', 0), -1, ('iadd', 31, ('ineg', ('ufind_msb_rev', 
a)))), ('ufind_msb', a), '!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ine', 'a@32', 0), ('iadd', 31, ('ineg', ('ufind_msb_rev', 
a))), ('ufind_msb_rev', a)), ('ufind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+   (('bcsel', ('ieq', 'a@32', 0), ('ufind_msb_rev', a), ('iadd', 31, ('ineg', 
('ufind_msb_rev', a)))), ('ufind_msb', a), 
'!options->lower_find_msb_to_reverse'),
+
+   (('find_lsb', ('bitfield_reverse', a)), ('ufind_msb_rev', a), 
'options->has_find_msb_rev'),
+   (('ufind_msb_rev', ('bitfield_reverse', a)), ('find_lsb', a), 
'!options->lower_find_lsb'),
+
    (('~fmul', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, ('fneg', 
b), b)),
    (('~fmul', ('bcsel(is_used_once)', c, 1.0, -1.0), b), ('bcsel', c, b, 
('fneg', b))),
    (('~fmulz', ('bcsel(is_used_once)', c, -1.0, 1.0), b), ('bcsel', c, 
('fneg', b), b)),

Reply via email to