Module: Mesa
Branch: master
Commit: 631e18d4275dc46cf47c969e85d8ec2d3d0262be
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=631e18d4275dc46cf47c969e85d8ec2d3d0262be

Author: Rhys Perry <[email protected]>
Date:   Fri Jun  5 17:36:29 2020 +0100

aco: create v_mad_u32_u24

fossil-db (Navi):
Totals from 849 (0.61% of 138791) affected shaders:
SGPRs: 38528 -> 38544 (+0.04%)
VGPRs: 39860 -> 39856 (-0.01%)
CodeSize: 2701880 -> 2702016 (+0.01%)
MaxWaves: 9148 -> 9150 (+0.02%)
Instrs: 509864 -> 509821 (-0.01%); split: -0.01%, +0.00%
Cycles: 3400124 -> 3399628 (-0.01%); split: -0.02%, +0.00%
VMEM: 262757 -> 262672 (-0.03%)
SMEM: 59710 -> 59704 (-0.01%)
Copies: 44461 -> 44466 (+0.01%)

fossil-db (Polaris):
Totals from 1487 (1.06% of 140385) affected shaders:
SGPRs: 54688 -> 55840 (+2.11%)
CodeSize: 2725608 -> 2725720 (+0.00%); split: -0.01%, +0.01%
Instrs: 521394 -> 517710 (-0.71%)
Cycles: 18474108 -> 18410964 (-0.34%)
VMEM: 436992 -> 431028 (-1.36%); split: +0.06%, -1.43%
SMEM: 124503 -> 122564 (-1.56%); split: +0.45%, -2.00%
VClause: 21972 -> 22015 (+0.20%); split: -0.12%, +0.31%
SClause: 14274 -> 14287 (+0.09%)
Copies: 44407 -> 44411 (+0.01%); split: -0.02%, +0.03%
PreSGPRs: 34318 -> 34321 (+0.01%); split: -0.00%, +0.01%

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Samuel Pitoiset <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7639>

---

 src/amd/compiler/aco_optimizer.cpp        | 14 +++++++-------
 src/amd/compiler/tests/test_optimizer.cpp | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/src/amd/compiler/aco_optimizer.cpp 
b/src/amd/compiler/aco_optimizer.cpp
index 5d810b306bb..47066d4080f 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -1316,6 +1316,9 @@ void label_instruction(opt_ctx &ctx, Block& block, 
aco_ptr<Instruction>& instr)
          ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
       }
       break;
+   case aco_opcode::v_mul_u32_u24:
+      ctx.info[instr->definitions[0].tempId()].set_usedef(instr.get());
+      break;
    case aco_opcode::v_and_b32: { /* abs */
       if (!instr->usesModifiers() && instr->operands[1].isTemp() &&
           instr->operands[1].getTemp().type() == RegType::vgpr &&
@@ -2324,12 +2327,6 @@ bool combine_add_bcnt(opt_ctx& ctx, 
aco_ptr<Instruction>& instr)
    if (instr->usesModifiers())
       return false;
 
-   /* Do not combine if the carry-out is used. */
-   if ((instr->opcode == aco_opcode::v_add_co_u32 ||
-        instr->opcode == aco_opcode::v_add_co_u32_e64) &&
-       ctx.uses[instr->definitions[1].tempId()])
-      return false;
-
    for (unsigned i = 0; i < 2; i++) {
       Instruction *op_instr = follow_operand(ctx, instr->operands[i]);
       if (op_instr &&
@@ -2912,6 +2909,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, 
aco_ptr<Instruction>& instr
    } else if (instr->opcode == aco_opcode::v_add_u32) {
       if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
       else if (combine_add_bcnt(ctx, instr)) ;
+      else if (combine_three_valu_op(ctx, instr, aco_opcode::v_mul_u32_u24, 
aco_opcode::v_mad_u32_u24, "120", 1 | 2)) ;
       else if (ctx.program->chip_class >= GFX9 && !instr->usesModifiers()) {
          if (combine_three_valu_op(ctx, instr, aco_opcode::s_xor_b32, 
aco_opcode::v_xad_u32, "120", 1 | 2)) ;
          else if (combine_three_valu_op(ctx, instr, aco_opcode::v_xor_b32, 
aco_opcode::v_xad_u32, "120", 1 | 2)) ;
@@ -2924,8 +2922,10 @@ void combine_instruction(opt_ctx &ctx, Block& block, 
aco_ptr<Instruction>& instr
       }
    } else if (instr->opcode == aco_opcode::v_add_co_u32 ||
               instr->opcode == aco_opcode::v_add_co_u32_e64) {
+      bool carry_out = ctx.uses[instr->definitions[1].tempId()] > 0;
       if (combine_add_sub_b2i(ctx, instr, aco_opcode::v_addc_co_u32, 1 | 2)) ;
-      else combine_add_bcnt(ctx, instr);
+      else if (!carry_out && combine_add_bcnt(ctx, instr)) ;
+      else if (!carry_out) combine_three_valu_op(ctx, instr, 
aco_opcode::v_mul_u32_u24, aco_opcode::v_mad_u32_u24, "120", 1 | 2);
    } else if (instr->opcode == aco_opcode::v_sub_u32 ||
               instr->opcode == aco_opcode::v_sub_co_u32 ||
               instr->opcode == aco_opcode::v_sub_co_u32_e64) {
diff --git a/src/amd/compiler/tests/test_optimizer.cpp 
b/src/amd/compiler/tests/test_optimizer.cpp
index 8d6805febb8..bf7b51bee69 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -723,3 +723,24 @@ BEGIN_TEST(optimize.minmax)
       finish_opt_test();
    }
 END_TEST
+
+BEGIN_TEST(optimize.mad_32_24)
+   for (unsigned i = GFX8; i <= GFX9; i++) {
+      //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
+      if (!setup_cs("v1 v1 v1", (chip_class)i))
+         continue;
+
+      //! v1: %res0 = v_mad_u32_u24 %b, %c, %a
+      //! p_unit_test 0, %res0
+      Temp mul = bld.vop2(aco_opcode::v_mul_u32_u24, bld.def(v1), inputs[1], 
inputs[2]);
+      writeout(0, bld.vadd32(bld.def(v1), inputs[0], mul));
+
+      //! v1: %res1_tmp = v_mul_u32_u24 %b, %c
+      //! v1: %_, s2: %res1 = v_add_co_u32 %a, %res1_tmp
+      //! p_unit_test 1, %res1
+      mul = bld.vop2(aco_opcode::v_mul_u32_u24, bld.def(v1), inputs[1], 
inputs[2]);
+      writeout(1, bld.vadd32(bld.def(v1), inputs[0], mul, 
true).def(1).getTemp());
+
+      finish_opt_test();
+   }
+END_TEST

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to