This patch improves 64-bit multiplication for AMD GCN: patterns for unsigned and signed 32x32->64 bit multiplication have been added, and also 64x64->64 bit multiplication is now open-coded rather than calling a library function (which may be a win for code size as well as speed: the function calling sequence isn't particularly concise for GCN).
The <su>mulsi3_highpart pattern has also been extended for GCN5+, since that ISA version supports high-part result multiply instructions with SGPR operands. The DImode multiply implementation is lost from libgcc if we build it for DImode/TImode rather than SImode/DImode, a change we make in a later patch in this series. I can probably self-approve this, but I'll give Andrew Stubbs a chance to comment. Thanks, Julian 2021-06-18 Julian Brown <jul...@codesourcery.com> gcc/ * config/gcn/gcn.md (<su>mulsi3_highpart): Add SGPR alternatives for GCN5+. (<su>mulsidi3, muldi3): Add expanders. --- gcc/config/gcn/gcn.md | 55 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index b5f895a93e2..70655ca4b8b 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1392,19 +1392,62 @@ (define_code_attr e [(sign_extend "e") (zero_extend "")]) (define_insn "<su>mulsi3_highpart" - [(set (match_operand:SI 0 "register_operand" "= v") + [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v") (truncate:SI (lshiftrt:DI (mult:DI (any_extend:DI - (match_operand:SI 1 "register_operand" "% v")) + (match_operand:SI 1 "register_operand" "%SgA,SgA, v")) (any_extend:DI - (match_operand:SI 2 "register_operand" "vSv"))) + (match_operand:SI 2 "register_operand" "SgA, B,vSv"))) (const_int 32))))] "" - "v_mul_hi<sgnsuffix>0\t%0, %2, %1" - [(set_attr "type" "vop3a") - (set_attr "length" "8")]) + "@ + s_mul_hi<sgnsuffix>0\t%0, %1, %2 + s_mul_hi<sgnsuffix>0\t%0, %1, %2 + v_mul_hi<sgnsuffix>0\t%0, %2, %1" + [(set_attr "type" "sop2,sop2,vop3a") + (set_attr "length" "4,8,8") + (set_attr "gcn_version" "gcn5,gcn5,*")]) + +(define_expand "<su>mulsidi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI + (any_extend:DI (match_operand:SI 1 "register_operand" "")) + (any_extend:DI (match_operand:SI 2 "register_operand" ""))))] + "" + { + rtx dst = gen_reg_rtx (DImode); + rtx dstlo = gen_lowpart (SImode, dst); + rtx dsthi = gen_highpart_mode (SImode, DImode, dst); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2])); + emit_move_insn (operands[0], dst); + DONE; + }) + +(define_expand "muldi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (match_operand:DI 1 "register_operand" "") + (match_operand:DI 2 "register_operand" "")))] + "" + { + rtx tmp0 = gen_reg_rtx (SImode); + rtx tmp1 = gen_reg_rtx (SImode); + rtx dst = gen_reg_rtx (DImode); + rtx dsthi = gen_highpart_mode (SImode, DImode, dst); + rtx op1lo = gen_lowpart (SImode, operands[1]); + rtx op1hi = gen_highpart_mode (SImode, DImode, operands[1]); + rtx op2lo = gen_lowpart (SImode, operands[2]); + rtx op2hi = gen_highpart_mode (SImode, DImode, operands[2]); + emit_insn (gen_umulsidi3 (dst, op1lo, op2lo)); + emit_insn (gen_mulsi3 (tmp0, op1lo, op2hi)); + emit_insn (gen_addsi3 (dsthi, dsthi, tmp0)); + emit_insn (gen_mulsi3 (tmp1, op1hi, op2lo)); + emit_insn (gen_addsi3 (dsthi, dsthi, tmp1)); + emit_move_insn (operands[0], dst); + DONE; + }) (define_insn "<u>mulhisi3" [(set (match_operand:SI 0 "register_operand" "=v") -- 2.29.2