Module: Mesa Branch: main Commit: aac1e3f5950fd701b872ae907b2aa7165d772adc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=aac1e3f5950fd701b872ae907b2aa7165d772adc
Author: Faith Ekstrand <[email protected]> Date: Thu Dec 7 10:10:34 2023 -0600 nir: Add a new has_fmulz_no_denorms flag Reviewed-by: Alyssa Rosenzweig <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26569> --- src/compiler/nir/nir.h | 6 ++++++ src/compiler/nir/nir_opt_algebraic.py | 14 +++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 3e66e822c84..497a5717855 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3900,6 +3900,12 @@ typedef struct nir_shader_compiler_options { /** Backend supports fmulz (and ffmaz if lower_ffma32=false) */ bool has_fmulz; + /** + * Backend supports fmulz (and ffmaz if lower_ffma32=false) but only if + * FLOAT_CONTROLS_DENORM_PRESERVE_FP32 is not set + */ + bool has_fmulz_no_denorms; + /** Backend supports 32bit ufind_msb_rev and ifind_msb_rev. */ bool has_find_msb_rev; diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 6cb19b53343..89d51b17072 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -46,6 +46,10 @@ signed_zero_nan_preserve_32 = ('(nir_is_float_control_signed_zero_preserve(info- signed_zero_inf_nan_preserve_16 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 16)' signed_zero_inf_nan_preserve_32 = 'nir_is_float_control_signed_zero_inf_nan_preserve(info->float_controls_execution_mode, 32)' +has_fmulz = '(options->has_fmulz || \ + (options->has_fmulz_no_denorms && \ + !nir_is_denorm_preserve(info->float_controls_execution_mode, 32)))' + ignore_exact = nir_algebraic.ignore_exact # Written in the form (<search>, <replace>) where <search> is an expression @@ -274,20 +278,20 @@ optimizations = [ # Optimize open-coded fmulz. # (b==0.0 ? 0.0 : a) * (a==0.0 ? 0.0 : b) -> fmulz(a, b) (('fmul@32', ('bcsel', ignore_exact('feq', b, 0.0), 0.0, a), ('bcsel', ignore_exact('feq', a, 0.0), 0.0, b)), - ('fmulz', a, b), 'options->has_fmulz && !'+signed_zero_preserve_32), + ('fmulz', a, b), has_fmulz+' && !'+signed_zero_preserve_32), (('fmul@32', a, ('bcsel', ignore_exact('feq', a, 0.0), 0.0, '#b(is_not_const_zero)')), - ('fmulz', a, b), 'options->has_fmulz && !'+signed_zero_preserve_32), + ('fmulz', a, b), has_fmulz+' && !'+signed_zero_preserve_32), # ffma(b==0.0 ? 0.0 : a, a==0.0 ? 0.0 : b, c) -> ffmaz(a, b, c) (('ffma@32', ('bcsel', ignore_exact('feq', b, 0.0), 0.0, a), ('bcsel', ignore_exact('feq', a, 0.0), 0.0, b), c), - ('ffmaz', a, b, c), 'options->has_fmulz && !'+signed_zero_preserve_32), + ('ffmaz', a, b, c), has_fmulz+' && !'+signed_zero_preserve_32), (('ffma@32', a, ('bcsel', ignore_exact('feq', a, 0.0), 0.0, '#b(is_not_const_zero)'), c), - ('ffmaz', a, b, c), 'options->has_fmulz && !'+signed_zero_preserve_32), + ('ffmaz', a, b, c), has_fmulz+' && !'+signed_zero_preserve_32), # b == 0.0 ? 1.0 : fexp2(fmul(a, b)) -> fexp2(fmulz(a, b)) (('bcsel', ignore_exact('feq', b, 0.0), 1.0, ('fexp2', ('fmul@32', a, b))), ('fexp2', ('fmulz', a, b)), - 'options->has_fmulz && !'+signed_zero_inf_nan_preserve_32), + has_fmulz+' && !'+signed_zero_inf_nan_preserve_32), ] # Shorthand for the expansion of just the dot product part of the [iu]dp4a
