https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/67863
>From 08f77d6a53dadd4c136b92fcb60700fd7389eeb3 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Fri, 29 Sep 2023 15:17:43 -0700 Subject: [PATCH 1/6] [RISCV][GISel] Add ISel supports for SHXADD from Zba extension This patch constitue of porting (SDISel) patterns of SHXADD instructions. Note that `non_imm12`, a predicate that was implemented with `PatLeaf`, is now turned into a ComplexPattern to facilitate code reusing on patterns that use it between SDISel and GISel. --- .../RISCV/GISel/RISCVInstructionSelector.cpp | 130 +++++++++++++++ llvm/lib/Target/RISCV/RISCVGISel.td | 10 ++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 9 ++ llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 2 + llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 51 +++--- .../instruction-select/zba-rv32.mir | 152 ++++++++++++++++++ .../instruction-select/zba-rv64.mir | 152 ++++++++++++++++++ 7 files changed, 479 insertions(+), 27 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv32.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 4f97a0d84f686f9..3a98e84546f376f 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -17,6 +17,7 @@ #include "RISCVTargetMachine.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/Support/Debug.h" @@ -55,6 +56,14 @@ class RISCVInstructionSelector : public InstructionSelector { ComplexRendererFns selectShiftMask(MachineOperand &Root) const; + ComplexRendererFns selectNonImm12(MachineOperand &Root) const; + + ComplexRendererFns selectSHXADDOp(MachineOperand &Root, unsigned ShAmt) const; + template <unsigned ShAmt> + ComplexRendererFns selectSHXADDOp(MachineOperand &Root) const { + return selectSHXADDOp(Root, ShAmt); + } + // Custom renderers for tablegen void renderNegImm(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; @@ -105,6 +114,127 @@ RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const { return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}}; } +// This complex pattern actually serves as a perdicate that is effectively +// `!isInt<12>(Imm)`. +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectNonImm12(MachineOperand &Root) const { + MachineFunction &MF = *Root.getParent()->getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (Root.isReg() && Root.getReg()) + if (auto Val = getIConstantVRegValWithLookThrough(Root.getReg(), MRI)) { + // We do NOT want immediates that fit in 12 bits. + if (isInt<12>(Val->Value.getSExtValue())) + return std::nullopt; + } + + return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}}; +} + +InstructionSelector::ComplexRendererFns +RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root, + unsigned ShAmt) const { + using namespace llvm::MIPatternMatch; + MachineFunction &MF = *Root.getParent()->getParent()->getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (!Root.isReg()) + return std::nullopt; + Register RootReg = Root.getReg(); + + const unsigned XLen = STI.getXLen(); + APInt Mask, C2; + Register RegY; + std::optional<bool> LeftShift; + // (and (shl y, c2), mask) + if (mi_match(RootReg, MRI, + m_GAnd(m_GShl(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask)))) + LeftShift = true; + // (and (lshr y, c2), mask) + else if (mi_match(RootReg, MRI, + m_GAnd(m_GLShr(m_Reg(RegY), m_ICst(C2)), m_ICst(Mask)))) + LeftShift = false; + + if (LeftShift.has_value()) { + if (*LeftShift) + Mask &= maskTrailingZeros<uint64_t>(C2.getLimitedValue()); + else + Mask &= maskTrailingOnes<uint64_t>(XLen - C2.getLimitedValue()); + + if (Mask.isShiftedMask()) { + unsigned Leading = XLen - Mask.getActiveBits(); + unsigned Trailing = Mask.countr_zero(); + // Given (and (shl y, c2), mask) in which mask has no leading zeros and c3 + // trailing zeros. We can use an SRLI by c3 - c2 followed by a SHXADD. + if (*LeftShift && Leading == 0 && C2.ult(Trailing) && Trailing == ShAmt) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SRLI, {DstReg}, {RegY}) + .addImm(Trailing - C2.getLimitedValue()); + MIB.addReg(DstReg); + }}}; + } + + // Given (and (lshr y, c2), mask) in which mask has c2 leading zeros and c3 + // trailing zeros. We can use an SRLI by c2 + c3 followed by a SHXADD. + if (!*LeftShift && Leading == C2 && Trailing == ShAmt) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SRLI, {DstReg}, {RegY}) + .addImm(Leading + Trailing); + MIB.addReg(DstReg); + }}}; + } + } + } + + LeftShift.reset(); + + // (shl (and y, mask), c2) + if (mi_match(RootReg, MRI, + m_GShl(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))), + m_ICst(C2)))) + LeftShift = true; + // (lshr (and y, mask), c2) + else if (mi_match(RootReg, MRI, + m_GLShr(m_OneNonDBGUse(m_GAnd(m_Reg(RegY), m_ICst(Mask))), + m_ICst(C2)))) + LeftShift = false; + + if (LeftShift.has_value()) + if (Mask.isShiftedMask()) { + unsigned Leading = XLen - Mask.getActiveBits(); + unsigned Trailing = Mask.countr_zero(); + + // Given (shl (and y, mask), c2) in which mask has 32 leading zeros and + // c3 trailing zeros. If c1 + c3 == ShAmt, we can emit SRLIW + SHXADD. + bool Cond = *LeftShift && Leading == 32 && Trailing > 0 && + (Trailing + C2.getLimitedValue()) == ShAmt; + if (!Cond) + // Given (lshr (and y, mask), c2) in which mask has 32 leading zeros and + // c3 trailing zeros. If c3 - c1 == ShAmt, we can emit SRLIW + SHXADD. + Cond = !*LeftShift && Leading == 32 && C2.ult(Trailing) && + (Trailing - C2.getLimitedValue()) == ShAmt; + + if (Cond) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SRLIW, {DstReg}, {RegY}) + .addImm(Trailing); + MIB.addReg(DstReg); + }}}; + } + } + + return std::nullopt; +} + // Tablegen doesn't allow us to write SRLIW/SRAIW/SLLIW patterns because the // immediate Operand has type XLenVT. GlobalISel wants it to be i32. bool RISCVInstructionSelector::earlySelectShift( diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 8059b517f26ba3c..2d6a293c2cca148 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -31,6 +31,16 @@ def ShiftMaskGI : GIComplexOperandMatcher<s32, "selectShiftMask">, GIComplexPatternEquiv<shiftMaskXLen>; +def gi_non_imm12 : GIComplexOperandMatcher<s32, "selectNonImm12">, + GIComplexPatternEquiv<non_imm12>; + +def gi_sh1add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<1>">, + GIComplexPatternEquiv<sh1add_op>; +def gi_sh2add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<2>">, + GIComplexPatternEquiv<sh2add_op>; +def gi_sh3add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<3>">, + GIComplexPatternEquiv<sh3add_op>; + // FIXME: Canonicalize (sub X, C) -> (add X, -C) earlier. def : Pat<(XLenVT (sub GPR:$rs1, simm12Plus1:$imm)), (ADDI GPR:$rs1, (NegImm simm12Plus1:$imm))>; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 70b9041852f91f8..de04f4c12e5e8e2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2443,6 +2443,15 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, return true; } +bool RISCVDAGToDAGISel::selectNonImm12(SDValue N, SDValue &Opnd) { + auto *C = dyn_cast<ConstantSDNode>(N); + if (!C || !isInt<12>(C->getSExtValue())) { + Opnd = N; + return true; + } + return false; +} + bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { ShAmt = N; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index c220b2d57c2e50f..d3d095a370683df 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -83,6 +83,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool trySignedBitfieldExtract(SDNode *Node); bool tryIndexedLoad(SDNode *Node); + bool selectNonImm12(SDValue N, SDValue &Opnd); + bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { return selectShiftMask(N, Subtarget->getXLen(), ShAmt); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index a21c3d132636bea..c20c3176bb27dbc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -235,10 +235,7 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{ }]>; // Pattern to exclude simm12 immediates from matching. -def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ - auto *C = dyn_cast<ConstantSDNode>(N); - return !C || !isInt<12>(C->getSExtValue()); -}]>; +def non_imm12 : ComplexPattern<XLenVT, 1, "selectNonImm12", [], [], 0>; def Shifted32OnesMask : PatLeaf<(imm), [{ uint64_t Imm = N->getZExtValue(); @@ -651,19 +648,19 @@ let Predicates = [HasStdExtZbb, IsRV64] in def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>; let Predicates = [HasStdExtZba] in { -def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), non_imm12:$rs2), +def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), (non_imm12 (XLenVT GPR:$rs2))), (SH1ADD GPR:$rs1, GPR:$rs2)>; -def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), non_imm12:$rs2), +def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), (non_imm12 (XLenVT GPR:$rs2))), (SH2ADD GPR:$rs1, GPR:$rs2)>; -def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), non_imm12:$rs2), +def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), (non_imm12 (XLenVT GPR:$rs2))), (SH3ADD GPR:$rs1, GPR:$rs2)>; // More complex cases use a ComplexPattern. -def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2), +def : Pat<(add sh1add_op:$rs1, (non_imm12 (XLenVT GPR:$rs2))), (SH1ADD sh1add_op:$rs1, GPR:$rs2)>; -def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2), +def : Pat<(add sh2add_op:$rs1, (non_imm12 (XLenVT GPR:$rs2))), (SH2ADD sh2add_op:$rs1, GPR:$rs2)>; -def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2), +def : Pat<(add sh3add_op:$rs1, (non_imm12 (XLenVT GPR:$rs2))), (SH3ADD sh3add_op:$rs1, GPR:$rs2)>; def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), @@ -735,48 +732,48 @@ def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)), (SLLI_UW (SRLI GPR:$rs1, Shifted32OnesMask:$mask), Shifted32OnesMask:$mask)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), (ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>; -def : Pat<(i64 (or_is_add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (or_is_add (and GPR:$rs1, 0xFFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), (ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)), +def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), (non_imm12 (XLenVT GPR:$rs2)))), (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), non_imm12:$rs2)), +def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), (non_imm12 (XLenVT GPR:$rs2)))), (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), non_imm12:$rs2)), +def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), (non_imm12 (XLenVT GPR:$rs2)))), (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2)), +def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; // More complex cases use a ComplexPattern. -def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)), +def : Pat<(i64 (add sh1add_uw_op:$rs1, (non_imm12 (XLenVT GPR:$rs2)))), (SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)), +def : Pat<(i64 (add sh2add_uw_op:$rs1, (non_imm12 (XLenVT GPR:$rs2)))), (SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)), +def : Pat<(i64 (add sh3add_uw_op:$rs1, (non_imm12 (XLenVT GPR:$rs2)))), (SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)), +def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), (non_imm12 (XLenVT GPR:$rs2)))), (SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), non_imm12:$rs2)), +def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), (non_imm12 (XLenVT GPR:$rs2)))), (SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), non_imm12:$rs2)), +def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), (non_imm12 (XLenVT GPR:$rs2)))), (SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. -def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), non_imm12:$rs2)), +def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), (non_imm12 (XLenVT GPR:$rs2)))), (SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)), +def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), (non_imm12 (XLenVT GPR:$rs2)))), (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)), +def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), (non_imm12 (XLenVT GPR:$rs2)))), (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)), diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv32.mir new file mode 100644 index 000000000000000..f90de3ea55a1bb7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv32.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=riscv32 -mattr='+zba' -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - \ +# RUN: | FileCheck %s + +--- +name: sh1add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: sh1add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SH1ADD:%[0-9]+]]:gpr = SH1ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH1ADD]] + %0:gprb(s32) = COPY $x10 + %1:gprb(s32) = COPY $x11 + %2:gprb(s32) = G_CONSTANT i32 1 + %3:gprb(s32) = G_SHL %0, %2 + %4:gprb(s32) = G_ADD %3, %1 + $x10 = COPY %4(s32) +... +--- +name: sh2add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: sh2add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SH2ADD:%[0-9]+]]:gpr = SH2ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH2ADD]] + %0:gprb(s32) = COPY $x10 + %1:gprb(s32) = COPY $x11 + %2:gprb(s32) = G_CONSTANT i32 2 + %3:gprb(s32) = G_SHL %0, %2 + %4:gprb(s32) = G_ADD %3, %1 + $x10 = COPY %4(s32) +... +--- +name: sh3add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: sh3add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SH3ADD:%[0-9]+]]:gpr = SH3ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH3ADD]] + %0:gprb(s32) = COPY $x10 + %1:gprb(s32) = COPY $x11 + %2:gprb(s32) = G_CONSTANT i32 3 + %3:gprb(s32) = G_SHL %0, %2 + %4:gprb(s32) = G_ADD %3, %1 + $x10 = COPY %4(s32) +... +--- +name: no_sh1add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: no_sh1add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 1 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[SLLI]], 37 + ; CHECK-NEXT: $x10 = COPY [[ADDI]] + %0:gprb(s32) = COPY $x10 + %1:gprb(s32) = G_CONSTANT i32 37 + %2:gprb(s32) = G_CONSTANT i32 1 + %3:gprb(s32) = G_SHL %0, %2 + %4:gprb(s32) = G_ADD %3, %1 + $x10 = COPY %4(s32) +... +--- +name: shXadd_complex_shl_and +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: shXadd_complex_shl_and + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[COPY]], 1 + ; CHECK-NEXT: [[SH2ADD:%[0-9]+]]:gpr = SH2ADD [[SRLI]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH2ADD]] + %0:gprb(s32) = COPY $x10 + %1:gprb(s32) = COPY $x11 + + %2:gprb(s32) = G_CONSTANT i32 1 + %3:gprb(s32) = G_SHL %0, %2 + %4:gprb(s32) = G_CONSTANT i32 4294967292 + %5:gprb(s32) = G_AND %3, %4 + + %6:gprb(s32) = G_ADD %5, %1 + $x10 = COPY %6(s32) +... +--- +name: shXadd_complex_lshr_and +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: shXadd_complex_lshr_and + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SRLI:%[0-9]+]]:gpr = SRLI [[COPY]], 29 + ; CHECK-NEXT: [[SH2ADD:%[0-9]+]]:gpr = SH2ADD [[SRLI]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH2ADD]] + %0:gprb(s32) = COPY $x10 + %1:gprb(s32) = COPY $x11 + + %2:gprb(s32) = G_CONSTANT i32 27 + %3:gprb(s32) = G_LSHR %0, %2 + %4:gprb(s32) = G_CONSTANT i32 60 + %5:gprb(s32) = G_AND %3, %4 + + %6:gprb(s32) = G_ADD %5, %1 + $x10 = COPY %6(s32) +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir new file mode 100644 index 000000000000000..092a3305b3453d2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/zba-rv64.mir @@ -0,0 +1,152 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=riscv64 -mattr='+zba' -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - \ +# RUN: | FileCheck %s + +--- +name: sh1add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: sh1add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SH1ADD:%[0-9]+]]:gpr = SH1ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH1ADD]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s64) = COPY $x11 + %2:gprb(s64) = G_CONSTANT i64 1 + %3:gprb(s64) = G_SHL %0, %2 + %4:gprb(s64) = G_ADD %3, %1 + $x10 = COPY %4(s64) +... +--- +name: sh2add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: sh2add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SH2ADD:%[0-9]+]]:gpr = SH2ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH2ADD]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s64) = COPY $x11 + %2:gprb(s64) = G_CONSTANT i64 2 + %3:gprb(s64) = G_SHL %0, %2 + %4:gprb(s64) = G_ADD %3, %1 + $x10 = COPY %4(s64) +... +--- +name: sh3add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: sh3add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SH3ADD:%[0-9]+]]:gpr = SH3ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH3ADD]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s64) = COPY $x11 + %2:gprb(s64) = G_CONSTANT i64 3 + %3:gprb(s64) = G_SHL %0, %2 + %4:gprb(s64) = G_ADD %3, %1 + $x10 = COPY %4(s64) +... +--- +name: no_sh1add +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: no_sh1add + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 1 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[SLLI]], 37 + ; CHECK-NEXT: $x10 = COPY [[ADDI]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s64) = G_CONSTANT i64 37 + %2:gprb(s64) = G_CONSTANT i64 1 + %3:gprb(s64) = G_SHL %0, %2 + %4:gprb(s64) = G_ADD %3, %1 + $x10 = COPY %4(s64) +... +--- +name: shXadd_complex_and_shl +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: shXadd_complex_and_shl + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SRLIW:%[0-9]+]]:gpr = SRLIW [[COPY]], 1 + ; CHECK-NEXT: [[SH3ADD:%[0-9]+]]:gpr = SH3ADD [[SRLIW]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH3ADD]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s64) = COPY $x11 + + %2:gprb(s64) = G_CONSTANT i64 4294967294 + %3:gprb(s64) = G_AND %0, %2 + %4:gprb(s64) = G_CONSTANT i64 2 + %5:gprb(s64) = G_SHL %3, %4 + + %6:gprb(s64) = G_ADD %5, %1 + $x10 = COPY %6(s64) +... +--- +name: shXadd_complex_and_lshr +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: shXadd_complex_and_lshr + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[SRLIW:%[0-9]+]]:gpr = SRLIW [[COPY]], 2 + ; CHECK-NEXT: [[SH1ADD:%[0-9]+]]:gpr = SH1ADD [[SRLIW]], [[COPY1]] + ; CHECK-NEXT: $x10 = COPY [[SH1ADD]] + %0:gprb(s64) = COPY $x10 + %1:gprb(s64) = COPY $x11 + + %2:gprb(s64) = G_CONSTANT i64 4294967292 + %3:gprb(s64) = G_AND %0, %2 + %4:gprb(s64) = G_CONSTANT i64 1 + %5:gprb(s64) = G_LSHR %3, %4 + + %6:gprb(s64) = G_ADD %5, %1 + $x10 = COPY %6(s64) +... >From 4d81ad5ee98aa284487b59ea1abef5090a746b6c Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Fri, 29 Sep 2023 15:54:05 -0700 Subject: [PATCH 2/6] fixup! [RISCV][GISel] Add ISel supports for SHXADD from Zba extension --- llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 3a98e84546f376f..3be97b016f47fea 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -164,8 +164,8 @@ RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root, if (Mask.isShiftedMask()) { unsigned Leading = XLen - Mask.getActiveBits(); unsigned Trailing = Mask.countr_zero(); - // Given (and (shl y, c2), mask) in which mask has no leading zeros and c3 - // trailing zeros. We can use an SRLI by c3 - c2 followed by a SHXADD. + // Given (and (shl y, c2), mask) in which mask has no leading zeros and + // c3 trailing zeros. We can use an SRLI by c3 - c2 followed by a SHXADD. if (*LeftShift && Leading == 0 && C2.ult(Trailing) && Trailing == ShAmt) { Register DstReg = MRI.createGenericVirtualRegister(MRI.getType(RootReg)); >From 2d4dce18884979959ca9cdf1d99a3134e6efe6ac Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Mon, 2 Oct 2023 11:10:16 -0700 Subject: [PATCH 3/6] (Staging) Use GISelPredicateCode in all SHXADD patterns But since there is a bug in llvm-tblgen that crashes itself whenever a ComplexPattern failed to be imported with `PredicateUsesOperands` + `GISelPredicateCode`, we preserve the original `non_imm12` (PatLeaf) and leave all `SHXADD_UW` patterns untouched. --- llvm/lib/Target/RISCV/RISCVGISel.td | 3 - llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 91 ++++++++++++++--------- 2 files changed, 56 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 2d6a293c2cca148..e0bc25c570cd209 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -31,9 +31,6 @@ def ShiftMaskGI : GIComplexOperandMatcher<s32, "selectShiftMask">, GIComplexPatternEquiv<shiftMaskXLen>; -def gi_non_imm12 : GIComplexOperandMatcher<s32, "selectNonImm12">, - GIComplexPatternEquiv<non_imm12>; - def gi_sh1add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<1>">, GIComplexPatternEquiv<sh1add_op>; def gi_sh2add_op : GIComplexOperandMatcher<s32, "selectSHXADDOp<2>">, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index c20c3176bb27dbc..6a1e8531c1650b2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -235,7 +235,33 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{ }]>; // Pattern to exclude simm12 immediates from matching. -def non_imm12 : ComplexPattern<XLenVT, 1, "selectNonImm12", [], [], 0>; +def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ + auto *C = dyn_cast<ConstantSDNode>(N); + return !C || !isInt<12>(C->getSExtValue()); +}]>; + +class binop_with_non_imm12<SDPatternOperator binop> : PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{ + auto *C = dyn_cast<ConstantSDNode>(Operands[1]); + return !C || !isInt<12>(C->getSExtValue()); +}]> { + let PredicateCodeUsesOperands = 1; + let GISelPredicateCode = [{ + const MachineOperand &ImmOp = *Operands[1]; + const MachineFunction &MF = *MI.getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + if (ImmOp.isReg() && ImmOp.getReg()) + if (auto Val = getIConstantVRegValWithLookThrough(ImmOp.getReg(), MRI)) { + // We do NOT want immediates that fit in 12 bits. + return !isInt<12>(Val->Value.getSExtValue()); + } + + return true; + }]; +} +def add_non_imm12 : binop_with_non_imm12<add>; +def or_is_add_non_imm12 : binop_with_non_imm12<or_is_add>; + def Shifted32OnesMask : PatLeaf<(imm), [{ uint64_t Imm = N->getZExtValue(); @@ -648,20 +674,17 @@ let Predicates = [HasStdExtZbb, IsRV64] in def : Pat<(i64 (and GPR:$rs, 0xFFFF)), (ZEXT_H_RV64 GPR:$rs)>; let Predicates = [HasStdExtZba] in { -def : Pat<(add (shl GPR:$rs1, (XLenVT 1)), (non_imm12 (XLenVT GPR:$rs2))), - (SH1ADD GPR:$rs1, GPR:$rs2)>; -def : Pat<(add (shl GPR:$rs1, (XLenVT 2)), (non_imm12 (XLenVT GPR:$rs2))), - (SH2ADD GPR:$rs1, GPR:$rs2)>; -def : Pat<(add (shl GPR:$rs1, (XLenVT 3)), (non_imm12 (XLenVT GPR:$rs2))), - (SH3ADD GPR:$rs1, GPR:$rs2)>; -// More complex cases use a ComplexPattern. -def : Pat<(add sh1add_op:$rs1, (non_imm12 (XLenVT GPR:$rs2))), - (SH1ADD sh1add_op:$rs1, GPR:$rs2)>; -def : Pat<(add sh2add_op:$rs1, (non_imm12 (XLenVT GPR:$rs2))), - (SH2ADD sh2add_op:$rs1, GPR:$rs2)>; -def : Pat<(add sh3add_op:$rs1, (non_imm12 (XLenVT GPR:$rs2))), - (SH3ADD sh3add_op:$rs1, GPR:$rs2)>; +foreach i = {1,2,3} in { + defvar shxadd = !cast<Instruction>("SH"#i#"ADD"); + def : Pat<(XLenVT (add_non_imm12 (shl GPR:$rs1, (XLenVT i)), GPR:$rs2)), + (shxadd GPR:$rs1, GPR:$rs2)>; + + defvar pat = !cast<ComplexPattern>("sh"#i#"add_op"); + // More complex cases use a ComplexPattern. + def : Pat<(XLenVT (add_non_imm12 pat:$rs1, GPR:$rs2)), + (shxadd pat:$rs1, GPR:$rs2)>; +} def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2), (SH1ADD (SH1ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>; @@ -731,49 +754,47 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xFFFFFFFF), uimm5:$shamt)), def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)), (SLLI_UW (SRLI GPR:$rs1, Shifted32OnesMask:$mask), Shifted32OnesMask:$mask)>; - -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)), (ADD_UW GPR:$rs1, GPR:$rs2)>; def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>; -def : Pat<(i64 (or_is_add (and GPR:$rs1, 0xFFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (or_is_add_non_imm12 (and GPR:$rs1, 0xFFFFFFFF), GPR:$rs2)), (ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), (non_imm12 (XLenVT GPR:$rs2)))), - (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 2)), (non_imm12 (XLenVT GPR:$rs2)))), - (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 3)), (non_imm12 (XLenVT GPR:$rs2)))), - (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; +foreach i = {1,2,3} in { + defvar shxadd_uw = !cast<Instruction>("SH"#i#"ADD_UW"); + def : Pat<(i64 (add_non_imm12 (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 i)), (XLenVT GPR:$rs2))), + (shxadd_uw GPR:$rs1, GPR:$rs2)>; +} -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 1)), 0x1FFFFFFFF), (XLenVT GPR:$rs2))), (SH1ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 2)), 0x3FFFFFFFF), (XLenVT GPR:$rs2))), (SH2ADD_UW GPR:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), (XLenVT GPR:$rs2))), (SH3ADD_UW GPR:$rs1, GPR:$rs2)>; // More complex cases use a ComplexPattern. -def : Pat<(i64 (add sh1add_uw_op:$rs1, (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)), (SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh2add_uw_op:$rs1, (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)), (SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add sh3add_uw_op:$rs1, (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)), (SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFE), (XLenVT GPR:$rs2))), (SH1ADD (SRLIW GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFC), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFFC), (XLenVT GPR:$rs2))), (SH2ADD (SRLIW GPR:$rs1, 2), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFF8), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0xFFFFFFF8), (XLenVT GPR:$rs2))), (SH3ADD (SRLIW GPR:$rs1, 3), GPR:$rs2)>; // Use SRLI to clear the LSBs and SHXADD_UW to mask and shift. -def : Pat<(i64 (add (and GPR:$rs1, 0x1FFFFFFFE), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x1FFFFFFFE), (XLenVT GPR:$rs2))), (SH1ADD_UW (SRLI GPR:$rs1, 1), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x3FFFFFFFC), (XLenVT GPR:$rs2))), (SH2ADD_UW (SRLI GPR:$rs1, 2), GPR:$rs2)>; -def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), (non_imm12 (XLenVT GPR:$rs2)))), +def : Pat<(i64 (add_non_imm12 (and GPR:$rs1, 0x7FFFFFFF8), (XLenVT GPR:$rs2))), (SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>; def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)), >From 9de0c2f758f379c0c1a620223364433e34980b1d Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Tue, 3 Oct 2023 15:02:26 -0700 Subject: [PATCH 4/6] fixup! (Staging) Use GISelPredicateCode in all SHXADD patterns --- .../RISCV/GISel/RISCVInstructionSelector.cpp | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 3be97b016f47fea..96498d3cbab0190 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -56,8 +56,6 @@ class RISCVInstructionSelector : public InstructionSelector { ComplexRendererFns selectShiftMask(MachineOperand &Root) const; - ComplexRendererFns selectNonImm12(MachineOperand &Root) const; - ComplexRendererFns selectSHXADDOp(MachineOperand &Root, unsigned ShAmt) const; template <unsigned ShAmt> ComplexRendererFns selectSHXADDOp(MachineOperand &Root) const { @@ -114,23 +112,6 @@ RISCVInstructionSelector::selectShiftMask(MachineOperand &Root) const { return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}}; } -// This complex pattern actually serves as a perdicate that is effectively -// `!isInt<12>(Imm)`. -InstructionSelector::ComplexRendererFns -RISCVInstructionSelector::selectNonImm12(MachineOperand &Root) const { - MachineFunction &MF = *Root.getParent()->getParent()->getParent(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - if (Root.isReg() && Root.getReg()) - if (auto Val = getIConstantVRegValWithLookThrough(Root.getReg(), MRI)) { - // We do NOT want immediates that fit in 12 bits. - if (isInt<12>(Val->Value.getSExtValue())) - return std::nullopt; - } - - return {{[=](MachineInstrBuilder &MIB) { MIB.add(Root); }}}; -} - InstructionSelector::ComplexRendererFns RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root, unsigned ShAmt) const { >From 0b2e658dcd74974518a3ad031185895dbca768e6 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Tue, 3 Oct 2023 15:13:35 -0700 Subject: [PATCH 5/6] fixup! (Staging) Use GISelPredicateCode in all SHXADD patterns --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 9 --------- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 2 -- llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 11 ++++++++--- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index de04f4c12e5e8e2..70b9041852f91f8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2443,15 +2443,6 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, return true; } -bool RISCVDAGToDAGISel::selectNonImm12(SDValue N, SDValue &Opnd) { - auto *C = dyn_cast<ConstantSDNode>(N); - if (!C || !isInt<12>(C->getSExtValue())) { - Opnd = N; - return true; - } - return false; -} - bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { ShAmt = N; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index d3d095a370683df..c220b2d57c2e50f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -83,8 +83,6 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { bool trySignedBitfieldExtract(SDNode *Node); bool tryIndexedLoad(SDNode *Node); - bool selectNonImm12(SDValue N, SDValue &Opnd); - bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) { return selectShiftMask(N, Subtarget->getXLen(), ShAmt); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 6a1e8531c1650b2..f8b4bc4945eb0a4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -235,12 +235,18 @@ def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{ }]>; // Pattern to exclude simm12 immediates from matching. +// Note: this will be removed once the GISel complex patterns for +// SHXADD_UW is landed. def non_imm12 : PatLeaf<(XLenVT GPR:$a), [{ auto *C = dyn_cast<ConstantSDNode>(N); return !C || !isInt<12>(C->getSExtValue()); }]>; -class binop_with_non_imm12<SDPatternOperator binop> : PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{ +// GISel currently doesn't support PatFrag for leaf nodes, so `non_imm12` +// cannot be directly supported in GISel. To reuse patterns between the two +// ISels, we instead create PatFrag on operators that use `non_imm12`. +class binop_with_non_imm12<SDPatternOperator binop> + : PatFrag<(ops node:$x, node:$y), (binop node:$x, node:$y), [{ auto *C = dyn_cast<ConstantSDNode>(Operands[1]); return !C || !isInt<12>(C->getSExtValue()); }]> { @@ -259,10 +265,9 @@ class binop_with_non_imm12<SDPatternOperator binop> : PatFrag<(ops node:$x, node return true; }]; } -def add_non_imm12 : binop_with_non_imm12<add>; +def add_non_imm12 : binop_with_non_imm12<add>; def or_is_add_non_imm12 : binop_with_non_imm12<or_is_add>; - def Shifted32OnesMask : PatLeaf<(imm), [{ uint64_t Imm = N->getZExtValue(); if (!isShiftedMask_64(Imm)) >From 5484c7e950b8afcfdce9ab4841cd765751c8e4bd Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <min....@sifive.com> Date: Wed, 18 Oct 2023 13:40:11 -0700 Subject: [PATCH 6/6] fixup! (Staging) Use GISelPredicateCode in all SHXADD patterns --- .../RISCV/GISel/RISCVInstructionSelector.cpp | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 96498d3cbab0190..0cec3a2f215e6cc 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -186,32 +186,31 @@ RISCVInstructionSelector::selectSHXADDOp(MachineOperand &Root, m_ICst(C2)))) LeftShift = false; - if (LeftShift.has_value()) - if (Mask.isShiftedMask()) { - unsigned Leading = XLen - Mask.getActiveBits(); - unsigned Trailing = Mask.countr_zero(); - - // Given (shl (and y, mask), c2) in which mask has 32 leading zeros and - // c3 trailing zeros. If c1 + c3 == ShAmt, we can emit SRLIW + SHXADD. - bool Cond = *LeftShift && Leading == 32 && Trailing > 0 && - (Trailing + C2.getLimitedValue()) == ShAmt; - if (!Cond) - // Given (lshr (and y, mask), c2) in which mask has 32 leading zeros and - // c3 trailing zeros. If c3 - c1 == ShAmt, we can emit SRLIW + SHXADD. - Cond = !*LeftShift && Leading == 32 && C2.ult(Trailing) && - (Trailing - C2.getLimitedValue()) == ShAmt; - - if (Cond) { - Register DstReg = - MRI.createGenericVirtualRegister(MRI.getType(RootReg)); - return {{[=](MachineInstrBuilder &MIB) { - MachineIRBuilder(*MIB.getInstr()) - .buildInstr(RISCV::SRLIW, {DstReg}, {RegY}) - .addImm(Trailing); - MIB.addReg(DstReg); - }}}; - } + if (LeftShift.has_value() && Mask.isShiftedMask()) { + unsigned Leading = XLen - Mask.getActiveBits(); + unsigned Trailing = Mask.countr_zero(); + + // Given (shl (and y, mask), c2) in which mask has 32 leading zeros and + // c3 trailing zeros. If c1 + c3 == ShAmt, we can emit SRLIW + SHXADD. + bool Cond = *LeftShift && Leading == 32 && Trailing > 0 && + (Trailing + C2.getLimitedValue()) == ShAmt; + if (!Cond) + // Given (lshr (and y, mask), c2) in which mask has 32 leading zeros and + // c3 trailing zeros. If c3 - c1 == ShAmt, we can emit SRLIW + SHXADD. + Cond = !*LeftShift && Leading == 32 && C2.ult(Trailing) && + (Trailing - C2.getLimitedValue()) == ShAmt; + + if (Cond) { + Register DstReg = + MRI.createGenericVirtualRegister(MRI.getType(RootReg)); + return {{[=](MachineInstrBuilder &MIB) { + MachineIRBuilder(*MIB.getInstr()) + .buildInstr(RISCV::SRLIW, {DstReg}, {RegY}) + .addImm(Trailing); + MIB.addReg(DstReg); + }}}; } + } return std::nullopt; } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits