Module: Mesa
Branch: main
Commit: 468ee8b80c7ffc03017d031df10875219430098e
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=468ee8b80c7ffc03017d031df10875219430098e

Author: Rhys Perry <[email protected]>
Date:   Fri Dec  1 16:20:38 2023 +0000

aco: implement 16-bit fsat on GFX8

GFX8 doesn't have v_med3_f16.

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Georg Lehmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26445>

---

 src/amd/compiler/aco_instruction_selection.cpp |  6 +++++-
 src/amd/compiler/aco_optimizer.cpp             | 13 +++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection.cpp 
b/src/amd/compiler/aco_instruction_selection.cpp
index e18b205f222..fe11ad16a23 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2631,9 +2631,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
          break;
       }
       Temp src = get_alu_src(ctx, instr->src[0]);
-      if (dst.regClass() == v2b) {
+      if (dst.regClass() == v2b && ctx->program->gfx_level >= GFX9) {
          bld.vop3(aco_opcode::v_med3_f16, Definition(dst), Operand::c16(0u), 
Operand::c16(0x3c00),
                   src);
+      } else if (dst.regClass() == v2b) {
+         bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst), 
Operand::c16(0x3c00), src)
+            ->valu()
+            .clamp = true;
       } else if (dst.regClass() == v1) {
          bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::zero(),
                   Operand::c32(0x3f800000u), src);
diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index 9fdbffc7994..cd8e8bf787e 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -1890,13 +1890,19 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
                bool neg1 = instr->operands[!i].constantEquals(fp16 ? 0xbc00 : 
0xbf800000u);
 
                VALU_instruction* vop3 = instr->isVOP3() ? &instr->valu() : 
NULL;
-               if (vop3 && (vop3->abs[!i] || vop3->neg[!i] || vop3->clamp || 
vop3->omod))
+               if (vop3 && (vop3->abs[!i] || vop3->neg[!i] || vop3->omod))
                   continue;
 
                bool abs = vop3 && vop3->abs[i];
                bool neg = neg1 ^ (vop3 && vop3->neg[i]);
-
                Temp other = instr->operands[i].getTemp();
+
+               if (vop3 && vop3->clamp) {
+                  if (!abs && !neg && other.type() == RegType::vgpr)
+                     ctx.info[other.id()].set_clamp(instr.get());
+                  continue;
+               }
+
                if (abs && neg && other.type() == RegType::vgpr)
                   ctx.info[instr->definitions[0].tempId()].set_neg_abs(other);
                else if (abs && !neg && other.type() == RegType::vgpr)
@@ -4562,6 +4568,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& 
instr)
       ctx.mad_infos.emplace_back(nullptr, 0);
       ctx.info[instr->definitions[0].tempId()].set_mad(ctx.mad_infos.size() - 
1);
    } else if (instr->opcode == aco_opcode::v_med3_f32 || instr->opcode == 
aco_opcode::v_med3_f16) {
+      /* Optimize v_med3 to v_add so that it can be dual issued on GFX11. We 
start with v_med3 in
+       * case omod can be applied.
+       */
       unsigned idx;
       if (detect_clamp(instr.get(), &idx)) {
          instr->format = asVOP3(Format::VOP2);

Reply via email to