Module: Mesa
Branch: main
Commit: 21304b772c285a9c90900ac9b2ef25202b7aa1aa
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=21304b772c285a9c90900ac9b2ef25202b7aa1aa

Author: Rhys Perry <[email protected]>
Date:   Mon Jan 17 17:54:47 2022 +0000

aco: apply clamp to v_fma_mix

fossil-db (Sienna Cichlid):
Totals from 2536 (1.88% of 134913) affected shaders:
CodeSize: 17314568 -> 17282960 (-0.18%)
Instrs: 3191438 -> 3187487 (-0.12%)
Latency: 59465090 -> 59407885 (-0.10%)
InvThroughput: 10271466 -> 10260512 (-0.11%)

fossil-db (Navi):
Totals from 2512 (1.86% of 134913) affected shaders:
CodeSize: 17194700 -> 17173396 (-0.12%)
Instrs: 3215093 -> 3212430 (-0.08%)
Latency: 60174315 -> 60142593 (-0.05%)
InvThroughput: 9491103 -> 9483979 (-0.08%)

fossil-db (Vega):
Totals from 2512 (1.86% of 135048) affected shaders:
CodeSize: 17186776 -> 17165472 (-0.12%)
Instrs: 3311166 -> 3308503 (-0.08%)
Latency: 65737409 -> 65716096 (-0.03%)
InvThroughput: 21735857 -> 21719792 (-0.07%)

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14769>

---

 src/amd/compiler/aco_optimizer.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index d99a97e7d9e..7109c2e4f7b 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -3017,11 +3017,13 @@ apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
       return false;
 
    bool can_vop3 = can_use_VOP3(ctx, instr);
-   if (!instr->isSDWA() && !can_vop3)
+   bool is_mad_mix =
+      instr->opcode == aco_opcode::v_fma_mix_f32 || instr->opcode == 
aco_opcode::v_fma_mixlo_f16;
+   if (!instr->isSDWA() && !is_mad_mix && !can_vop3)
       return false;
 
-   /* omod flushes -0 to +0 and has no effect if denormals are enabled */
-   bool can_use_omod = (can_vop3 || ctx.program->chip_class >= GFX9); /* SDWA 
omod is GFX9+ */
+   /* omod flushes -0 to +0 and has no effect if denormals are enabled. SDWA 
omod is GFX9+. */
+   bool can_use_omod = (can_vop3 || ctx.program->chip_class >= GFX9) && 
!instr->isVOP3P();
    if (instr->definitions[0].bytes() == 4)
       can_use_omod =
          can_use_omod && ctx.fp_mode.denorm32 == 0 && 
!ctx.fp_mode.preserve_signed_zero_inf_nan32;
@@ -3048,6 +3050,9 @@ apply_omod_clamp(opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
    if (instr->isSDWA()) {
       if (!apply_omod_clamp_helper(ctx, &instr->sdwa(), def_info))
          return false;
+   } else if (instr->isVOP3P()) {
+      assert(def_info.is_clamp());
+      instr->vop3p().clamp = true;
    } else {
       to_VOP3(ctx, instr);
       if (!apply_omod_clamp_helper(ctx, &instr->vop3(), def_info))

Reply via email to