Module: Mesa
Branch: main
Commit: e12bee3cb7f757408a3f739e788561a56d09041f
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e12bee3cb7f757408a3f739e788561a56d09041f

Author: Rhys Perry <[email protected]>
Date:   Thu Jan 27 14:00:38 2022 +0000

aco: improve support for v_fma_mix

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14769>

---

 src/amd/compiler/aco_ir.cpp       |  5 +++++
 src/amd/compiler/aco_ir.h         |  3 ++-
 src/amd/compiler/aco_print_ir.cpp | 19 +++++++++++++++++--
 src/amd/compiler/aco_validate.cpp |  6 ++++++
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index cd705bc1a18..23aa602ebac 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -150,6 +150,11 @@ init_program(Program* program, Stage stage, const struct 
radv_shader_info* info,
       program->dev.has_fast_fma32 = true;
    program->dev.has_mac_legacy32 = program->chip_class <= GFX7 || 
program->chip_class >= GFX10;
 
+   program->dev.fused_mad_mix = program->chip_class >= GFX10;
+   if (program->family == CHIP_VEGA12 || program->family == CHIP_VEGA20 ||
+       program->family == CHIP_ARCTURUS || program->family == CHIP_ALDEBARAN)
+      program->dev.fused_mad_mix = true;
+
    program->wgp_mode = wgp_mode;
 
    program->progress = CompilationProgress::after_isel;
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index fa4f885c873..8ec61795db3 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -1405,7 +1405,7 @@ static_assert(sizeof(VOP3_instruction) == 
sizeof(Instruction) + 8, "Unexpected p
 
 struct VOP3P_instruction : public Instruction {
    bool neg_lo[3];
-   bool neg_hi[3];
+   bool neg_hi[3]; /* abs modifier, for v_mad_mix/v_fma_mix */
    uint8_t opsel_lo : 3;
    uint8_t opsel_hi : 3;
    bool clamp : 1;
@@ -2047,6 +2047,7 @@ struct DeviceInfo {
    unsigned simd_per_cu;
    bool has_fast_fma32 = false;
    bool has_mac_legacy32 = false;
+   bool fused_mad_mix = false;
    bool xnack_enabled = false;
    bool sram_ecc_enabled = false;
 };
diff --git a/src/amd/compiler/aco_print_ir.cpp 
b/src/amd/compiler/aco_print_ir.cpp
index f650de37791..2b92eff8101 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -664,11 +664,16 @@ aco_print_instr(const Instruction* instr, FILE* output, 
unsigned flags)
       bool* const abs = (bool*)alloca(num_operands * sizeof(bool));
       bool* const neg = (bool*)alloca(num_operands * sizeof(bool));
       bool* const opsel = (bool*)alloca(num_operands * sizeof(bool));
+      bool* const f2f32 = (bool*)alloca(num_operands * sizeof(bool));
       for (unsigned i = 0; i < num_operands; ++i) {
          abs[i] = false;
          neg[i] = false;
          opsel[i] = false;
+         f2f32[i] = false;
       }
+      bool is_mad_mix = instr->opcode == aco_opcode::v_fma_mix_f32 ||
+                        instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
+                        instr->opcode == aco_opcode::v_fma_mixhi_f16;
       if (instr->isVOP3()) {
          const VOP3_instruction& vop3 = instr->vop3();
          for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
@@ -690,6 +695,14 @@ aco_print_instr(const Instruction* instr, FILE* output, 
unsigned flags)
             neg[i] = sdwa.neg[i];
             opsel[i] = false;
          }
+      } else if (instr->isVOP3P() && is_mad_mix) {
+         const VOP3P_instruction& vop3p = instr->vop3p();
+         for (unsigned i = 0; i < MIN2(num_operands, 3); ++i) {
+            abs[i] = vop3p.neg_hi[i];
+            neg[i] = vop3p.neg_lo[i];
+            f2f32[i] = vop3p.opsel_hi & (1 << i);
+            opsel[i] = f2f32[i] && (vop3p.opsel_lo & (1 << i));
+         }
       }
       for (unsigned i = 0; i < num_operands; ++i) {
          if (i)
@@ -703,13 +716,15 @@ aco_print_instr(const Instruction* instr, FILE* output, 
unsigned flags)
             fprintf(output, "|");
          if (opsel[i])
             fprintf(output, "hi(");
+         else if (f2f32[i])
+            fprintf(output, "lo(");
          aco_print_operand(&instr->operands[i], output, flags);
-         if (opsel[i])
+         if (f2f32[i] || opsel[i])
             fprintf(output, ")");
          if (abs[i])
             fprintf(output, "|");
 
-         if (instr->isVOP3P()) {
+         if (instr->isVOP3P() && !is_mad_mix) {
             const VOP3P_instruction& vop3 = instr->vop3p();
             if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {
                fprintf(output, ".%c%c", vop3.opsel_lo & (1 << i) ? 'y' : 'x',
diff --git a/src/amd/compiler/aco_validate.cpp 
b/src/amd/compiler/aco_validate.cpp
index 13e1b55f602..198aa072c0b 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -236,6 +236,12 @@ validate_ir(Program* program)
             if (instr->definitions[0].regClass().is_subdword() && 
!instr->definitions[0].isFixed())
                check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for 
sub-dword definition",
                      instr.get());
+         } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
+                    instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
+                    instr->opcode == aco_opcode::v_fma_mix_f32) {
+            check(instr->definitions[0].regClass() ==
+                     (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
+                  "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", 
instr.get());
          } else if (instr->isVOP3P()) {
             VOP3P_instruction& vop3p = instr->vop3p();
             for (unsigned i = 0; i < instr->operands.size(); i++) {

Reply via email to