This patch improves 64-bit multiplication for AMD GCN: patterns for
unsigned and signed 32x32->64 bit multiplication have been added, and
also 64x64->64 bit multiplication is now open-coded rather than calling
a library function (which may be a win for code size as well as speed:
the function calling sequence isn't particularly concise for GCN).
The <su>mulsi3_highpart pattern has also been extended for GCN5+, since
that ISA version supports high-part result multiply instructions with
SGPR operands.
The DImode multiply implementation is lost from libgcc if we build it
for DImode/TImode rather than SImode/DImode, a change we make in a later
patch in this series.
I can probably self-approve this, but I'll give Andrew Stubbs a chance
to comment.
Thanks,
Julian
2021-06-18 Julian Brown <jul...@codesourcery.com>
gcc/
* config/gcn/gcn.md (<su>mulsi3_highpart): Add SGPR alternatives for
GCN5+.
(<su>mulsidi3, muldi3): Add expanders.
---
gcc/config/gcn/gcn.md | 55 ++++++++++++++++++++++++++++++++++++++-----
1 file changed, 49 insertions(+), 6 deletions(-)
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index b5f895a93e2..70655ca4b8b 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -1392,19 +1392,62 @@
(define_code_attr e [(sign_extend "e") (zero_extend "")])
(define_insn "<su>mulsi3_highpart"
- [(set (match_operand:SI 0 "register_operand" "= v")
+ [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v")
(truncate:SI
(lshiftrt:DI
(mult:DI
(any_extend:DI
- (match_operand:SI 1 "register_operand" "% v"))
+ (match_operand:SI 1 "register_operand" "%SgA,SgA, v"))
(any_extend:DI
- (match_operand:SI 2 "register_operand" "vSv")))
+ (match_operand:SI 2 "register_operand" "SgA, B,vSv")))
(const_int 32))))]
""
- "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
- [(set_attr "type" "vop3a")
- (set_attr "length" "8")])
+ "@
+ s_mul_hi<sgnsuffix>0\t%0, %1, %2
+ s_mul_hi<sgnsuffix>0\t%0, %1, %2
+ v_mul_hi<sgnsuffix>0\t%0, %2, %1"
+ [(set_attr "type" "sop2,sop2,vop3a")
+ (set_attr "length" "4,8,8")
+ (set_attr "gcn_version" "gcn5,gcn5,*")])
+
+(define_expand "<su>mulsidi3"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI
+ (any_extend:DI (match_operand:SI 1 "register_operand" ""))
+ (any_extend:DI (match_operand:SI 2 "register_operand" ""))))]
+ ""
+ {
+ rtx dst = gen_reg_rtx (DImode);
+ rtx dstlo = gen_lowpart (SImode, dst);
+ rtx dsthi = gen_highpart_mode (SImode, DImode, dst);
+ emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2]));
+ emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2]));
+ emit_move_insn (operands[0], dst);
+ DONE;
+ })
+
+(define_expand "muldi3"
+ [(set (match_operand:DI 0 "register_operand" "")
+ (mult:DI (match_operand:DI 1 "register_operand" "")
+ (match_operand:DI 2 "register_operand" "")))]
+ ""
+ {
+ rtx tmp0 = gen_reg_rtx (SImode);
+ rtx tmp1 = gen_reg_rtx (SImode);
+ rtx dst = gen_reg_rtx (DImode);
+ rtx dsthi = gen_highpart_mode (SImode, DImode, dst);
+ rtx op1lo = gen_lowpart (SImode, operands[1]);
+ rtx op1hi = gen_highpart_mode (SImode, DImode, operands[1]);
+ rtx op2lo = gen_lowpart (SImode, operands[2]);
+ rtx op2hi = gen_highpart_mode (SImode, DImode, operands[2]);
+ emit_insn (gen_umulsidi3 (dst, op1lo, op2lo));
+ emit_insn (gen_mulsi3 (tmp0, op1lo, op2hi));
+ emit_insn (gen_addsi3 (dsthi, dsthi, tmp0));
+ emit_insn (gen_mulsi3 (tmp1, op1hi, op2lo));
+ emit_insn (gen_addsi3 (dsthi, dsthi, tmp1));
+ emit_move_insn (operands[0], dst);
+ DONE;
+ })
(define_insn "<u>mulhisi3"
[(set (match_operand:SI 0 "register_operand" "=v")