https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/67302
>From a1567f579531c3abbd1f4e9b7c7edd2f95ead42c Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 25 Sep 2023 17:10:51 +0800 Subject: [PATCH 1/6] [PowerPC] Implement llvm.set.rounding intrinsic According to LangRef, llvm.set.rounding sets rounding mode by integer argument: 0 - toward zero 1 - to nearest, ties to even 2 - toward positive infinity 3 - toward negative infinity 4 - to nearest, ties away from zero While PowerPC ISA says: 0 - to nearest 1 - toward zero 2 - toward positive infinity 3 - toward negative infinity This patch maps the argument and write into last two bits of FPSCR (rounding mode). --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 80 ++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/test/CodeGen/PowerPC/frounds.ll | 194 +++++++++++++++++++- 3 files changed, 274 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f4e3531980d165..4e5ff0cb716966 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -426,6 +426,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && @@ -8898,6 +8899,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, return FP; } +SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc Dl(Op); + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + SDValue Chain = Op.getOperand(0); + + // If requested mode is constant, just use simpler mtfsb. + if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + uint64_t Mode = CVal->getZExtValue(); + if (Mode >= 4) + llvm_unreachable("Unsupported rounding mode!"); + unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); + SDNode *SetHi = DAG.getMachineNode( + (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(30, Dl, MVT::i32, true), Chain}); + SDNode *SetLo = DAG.getMachineNode( + (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)}); + return SDValue(SetLo, 0); + } + + // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format. + SDValue One = DAG.getConstant(1, Dl, MVT::i32); + SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(3, Dl, MVT::i32)); + SDValue DstFlag = DAG.getNode( + ISD::XOR, Dl, MVT::i32, SrcFlag, + DAG.getNode(ISD::AND, Dl, MVT::i32, + DAG.getNOT(Dl, + DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One), + MVT::i32), + One)); + SDValue MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain); + Chain = MFFS.getValue(1); + SDValue NewFPSCR; + if (isTypeLegal(MVT::i64)) { + // Set the last two bits (rounding mode) of bitcasted FPSCR. + NewFPSCR = DAG.getNode( + ISD::OR, Dl, MVT::i64, + DAG.getNode(ISD::AND, Dl, MVT::i64, + DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS), + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i64), MVT::i64)), + DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag)); + NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR); + } else { + // In 32-bit mode, store f64, load and update the lower half. + int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo()); + SDValue Addr; + if (Subtarget.isLittleEndian()) + Addr = StackSlot; + else + Addr = DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot, + DAG.getConstant(4, Dl, PtrVT)); + SDValue Tmp = DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo()); + Chain = Tmp.getValue(1); + + Tmp = DAG.getNode( + ISD::OR, Dl, MVT::i32, + DAG.getNode(ISD::AND, Dl, MVT::i32, Tmp, + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i32), MVT::i32)), + DstFlag); + + Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo()); + NewFPSCR = + DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo()); + Chain = NewFPSCR.getValue(1); + } + SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true); + SDNode *MTFSF = DAG.getMachineNode( + PPC::MTFSF, Dl, MVT::Other, + {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain}); + return SDValue(MTFSF, 0); +} + SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11647,6 +11725,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); + case ISD::SET_ROUNDING: + return LowerSET_ROUNDING(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 7c62e370f1536a..6c197327593f0f 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1286,6 +1286,7 @@ namespace llvm { const SDLoc &dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll index c1f7181b30f3f6..1944af1687d0ef 100644 --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -77,4 +77,196 @@ return: ; preds = %entry ret i32 %retval3 } -declare i32 @llvm.get.rounding() nounwind +define void @setrnd_tozero() { +; PPC32-LABEL: setrnd_tozero: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tozero: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tozero: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_tozero: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 0) + ret void +} + +define void @setrnd_tonearest_tieeven() { +; PPC32-LABEL: setrnd_tonearest_tieeven: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tonearest_tieeven: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tonearest_tieeven: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_tonearest_tieeven: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 1) + ret void +} + +define void @setrnd_toposinf() { +; PPC32-LABEL: setrnd_toposinf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toposinf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toposinf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_toposinf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 2) + ret void +} + +define void @setrnd_toneginf() { +; PPC32-LABEL: setrnd_toneginf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toneginf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toneginf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_toneginf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 3) + ret void +} + +define void @setrnd_var(i32 %x) { +; PPC32-LABEL: setrnd_var: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: stwu 1, -16(1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: mffs 0 +; PPC32-NEXT: stfd 0, 8(1) +; PPC32-NEXT: clrlwi 4, 3, 30 +; PPC32-NEXT: lwz 5, 12(1) +; PPC32-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC32-NEXT: xor 3, 3, 4 +; PPC32-NEXT: xori 3, 3, 1 +; PPC32-NEXT: rlwinm 4, 5, 0, 0, 29 +; PPC32-NEXT: rlwimi 4, 3, 0, 30, 31 +; PPC32-NEXT: stw 4, 12(1) +; PPC32-NEXT: lfd 0, 8(1) +; PPC32-NEXT: mtfsf 255, 0 +; PPC32-NEXT: addi 1, 1, 16 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_var: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mffs 0 +; PPC64-NEXT: stfd 0, -16(1) +; PPC64-NEXT: ld 5, -16(1) +; PPC64-NEXT: clrlwi 4, 3, 30 +; PPC64-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64-NEXT: xor 3, 3, 4 +; PPC64-NEXT: xori 3, 3, 1 +; PPC64-NEXT: clrldi 3, 3, 32 +; PPC64-NEXT: rldicr 4, 5, 0, 61 +; PPC64-NEXT: or 3, 4, 3 +; PPC64-NEXT: std 3, -8(1) +; PPC64-NEXT: lfd 0, -8(1) +; PPC64-NEXT: mtfsf 255, 0 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_var: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mffs 0 +; PPC64LE-NEXT: stfd 0, -16(1) +; PPC64LE-NEXT: clrlwi 4, 3, 30 +; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64LE-NEXT: xor 3, 3, 4 +; PPC64LE-NEXT: ld 4, -16(1) +; PPC64LE-NEXT: xori 3, 3, 1 +; PPC64LE-NEXT: clrldi 3, 3, 32 +; PPC64LE-NEXT: rldicr 4, 4, 0, 61 +; PPC64LE-NEXT: or 3, 4, 3 +; PPC64LE-NEXT: std 3, -8(1) +; PPC64LE-NEXT: lfd 0, -8(1) +; PPC64LE-NEXT: mtfsf 255, 0 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_var: +; DM: # %bb.0: # %entry +; DM-NEXT: clrlwi 4, 3, 30 +; DM-NEXT: rlwinm 3, 3, 31, 31, 31 +; DM-NEXT: xor 3, 3, 4 +; DM-NEXT: xori 3, 3, 1 +; DM-NEXT: clrldi 3, 3, 32 +; DM-NEXT: mffs 0 +; DM-NEXT: mffprd 4, 0 +; DM-NEXT: rldicr 4, 4, 0, 61 +; DM-NEXT: or 3, 4, 3 +; DM-NEXT: mtfprd 0, 3 +; DM-NEXT: mtfsf 255, 0 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 %x) + ret void +} + +declare i32 @llvm.get.rounding() #0 +declare void @llvm.set.rounding(i32) #0 + +attributes #0 = { nounwind } >From f1c1a5c14147c69a5e2731e8ebe0febad6a12c4a Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Thu, 16 Nov 2023 15:25:08 +0800 Subject: [PATCH 2/6] Exclude SPE --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 ++--- llvm/test/CodeGen/PowerPC/frounds.ll | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5efc9ba487a710..3be34f89c51d04 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -420,14 +420,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } else { setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FMA , MVT::f32, Legal); + setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } if (Subtarget.hasSPE()) setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); - setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); - // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll index 1944af1687d0ef..538de2938aba81 100644 --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -233,14 +233,14 @@ define void @setrnd_var(i32 %x) { ; PPC64LE-LABEL: setrnd_var: ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: mffs 0 -; PPC64LE-NEXT: stfd 0, -16(1) ; PPC64LE-NEXT: clrlwi 4, 3, 30 ; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64LE-NEXT: stfd 0, -16(1) ; PPC64LE-NEXT: xor 3, 3, 4 ; PPC64LE-NEXT: ld 4, -16(1) ; PPC64LE-NEXT: xori 3, 3, 1 -; PPC64LE-NEXT: clrldi 3, 3, 32 ; PPC64LE-NEXT: rldicr 4, 4, 0, 61 +; PPC64LE-NEXT: clrldi 3, 3, 32 ; PPC64LE-NEXT: or 3, 4, 3 ; PPC64LE-NEXT: std 3, -8(1) ; PPC64LE-NEXT: lfd 0, -8(1) >From a2c14908060849a83b0ac000c96fa6a9251e811b Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 27 Nov 2023 17:42:07 +0800 Subject: [PATCH 3/6] Use assert instead of unreachable --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8cc794ad375de6..32faa60970605a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8910,8 +8910,7 @@ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, // If requested mode is constant, just use simpler mtfsb. if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { uint64_t Mode = CVal->getZExtValue(); - if (Mode >= 4) - llvm_unreachable("Unsupported rounding mode!"); + assert(Model < 4 && "Unsupported rounding mode!"); unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); SDNode *SetHi = DAG.getMachineNode( (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, >From 00a5ae8fac889f14ead068f0259540788bd125fb Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 27 Nov 2023 17:49:49 +0800 Subject: [PATCH 4/6] Fixup --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 32faa60970605a..c56d8d65c75bdd 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8910,7 +8910,7 @@ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, // If requested mode is constant, just use simpler mtfsb. if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { uint64_t Mode = CVal->getZExtValue(); - assert(Model < 4 && "Unsupported rounding mode!"); + assert(Mode < 4 && "Unsupported rounding mode!"); unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); SDNode *SetHi = DAG.getMachineNode( (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, >From 228d1845d9bf407712ea251ff350f7d6a952b2f2 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Wed, 10 Jan 2024 17:19:47 +0800 Subject: [PATCH 5/6] Exploit P9 mffscrn --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 76 +++++++++----- llvm/test/CodeGen/PowerPC/frounds.ll | 108 ++++++++++++++++++-- 2 files changed, 149 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 454e3c78cafa9e..16dcb973d6c967 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8906,11 +8906,17 @@ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, EVT PtrVT = getPointerTy(MF.getDataLayout()); SDValue Chain = Op.getOperand(0); - // If requested mode is constant, just use simpler mtfsb. + // If requested mode is constant, just use simpler mtfsb/mffscrni if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { uint64_t Mode = CVal->getZExtValue(); assert(Mode < 4 && "Unsupported rounding mode!"); unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); + if (Subtarget.isISA3_0()) + return SDValue( + DAG.getMachineNode( + PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other}, + {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}), + 1); SDNode *SetHi = DAG.getMachineNode( (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, {DAG.getConstant(30, Dl, MVT::i32, true), Chain}); @@ -8931,43 +8937,59 @@ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One), MVT::i32), One)); - SDValue MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain); - Chain = MFFS.getValue(1); + // For Power9, there's faster mffscrn, and we don't need to read FPSCR + SDValue MFFS; + if (!Subtarget.isISA3_0()) { + MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain); + Chain = MFFS.getValue(1); + } SDValue NewFPSCR; if (isTypeLegal(MVT::i64)) { - // Set the last two bits (rounding mode) of bitcasted FPSCR. - NewFPSCR = DAG.getNode( - ISD::OR, Dl, MVT::i64, - DAG.getNode(ISD::AND, Dl, MVT::i64, - DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS), - DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i64), MVT::i64)), - DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag)); + if (Subtarget.isISA3_0()) + NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64); + else + // Set the last two bits (rounding mode) of bitcasted FPSCR. + NewFPSCR = DAG.getNode( + ISD::OR, Dl, MVT::i64, + DAG.getNode( + ISD::AND, Dl, MVT::i64, + DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS), + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i64), MVT::i64)), + DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag)); NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR); } else { // In 32-bit mode, store f64, load and update the lower half. int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo()); - SDValue Addr; - if (Subtarget.isLittleEndian()) - Addr = StackSlot; - else - Addr = DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot, - DAG.getConstant(4, Dl, PtrVT)); - SDValue Tmp = DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo()); - Chain = Tmp.getValue(1); - - Tmp = DAG.getNode( - ISD::OR, Dl, MVT::i32, - DAG.getNode(ISD::AND, Dl, MVT::i32, Tmp, - DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i32), MVT::i32)), - DstFlag); - - Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo()); + SDValue Addr = Subtarget.isLittleEndian() + ? StackSlot + : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot, + DAG.getConstant(4, Dl, PtrVT)); + if (Subtarget.isISA3_0()) { + Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo()); + } else { + Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo()); + SDValue Tmp = + DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo()); + Chain = Tmp.getValue(1); + + Tmp = DAG.getNode( + ISD::OR, Dl, MVT::i32, + DAG.getNode( + ISD::AND, Dl, MVT::i32, Tmp, + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i32), MVT::i32)), + DstFlag); + + Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo()); + } NewFPSCR = DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo()); Chain = NewFPSCR.getValue(1); } + if (Subtarget.isISA3_0()) + return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other}, + {NewFPSCR, Chain}), + 1); SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true); SDNode *MTFSF = DAG.getMachineNode( PPC::MTFSF, Dl, MVT::Other, diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll index 538de2938aba81..de94aab839f8bc 100644 --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -5,14 +5,17 @@ ; RUN: -check-prefix=PPC64 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le -mattr=-direct-move \ ; RUN: | FileCheck %s -check-prefix=PPC64LE +; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=pwr9 \ +; RUN: | FileCheck %s -check-prefix=P9_32 +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le -mcpu=pwr9 \ +; RUN: | FileCheck %s -check-prefix=P9 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le | FileCheck %s \ ; RUN: -check-prefix=DM -define i32 @foo() { +define i32 @foo() #0 { ; PPC32-LABEL: foo: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: stwu 1, -32(1) -; PPC32-NEXT: .cfi_def_cfa_offset 32 ; PPC32-NEXT: mffs 0 ; PPC32-NEXT: stfd 0, 16(1) ; PPC32-NEXT: lwz 3, 20(1) @@ -51,6 +54,33 @@ define i32 @foo() { ; PPC64LE-NEXT: stw 3, -4(1) ; PPC64LE-NEXT: blr ; +; P9_32-LABEL: foo: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: stwu 1, -32(1) +; P9_32-NEXT: mffs 0 +; P9_32-NEXT: stfd 0, 16(1) +; P9_32-NEXT: lwz 3, 20(1) +; P9_32-NEXT: clrlwi 4, 3, 30 +; P9_32-NEXT: not 3, 3 +; P9_32-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9_32-NEXT: xor 3, 4, 3 +; P9_32-NEXT: stw 3, 24(1) +; P9_32-NEXT: stw 3, 28(1) +; P9_32-NEXT: addi 1, 1, 32 +; P9_32-NEXT: blr +; +; P9-LABEL: foo: +; P9: # %bb.0: # %entry +; P9-NEXT: mffs 0 +; P9-NEXT: mffprd 3, 0 +; P9-NEXT: clrlwi 4, 3, 30 +; P9-NEXT: not 3, 3 +; P9-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9-NEXT: xor 3, 4, 3 +; P9-NEXT: stw 3, -8(1) +; P9-NEXT: stw 3, -4(1) +; P9-NEXT: blr +; ; DM-LABEL: foo: ; DM: # %bb.0: # %entry ; DM-NEXT: mffs 0 @@ -77,7 +107,7 @@ return: ; preds = %entry ret i32 %retval3 } -define void @setrnd_tozero() { +define void @setrnd_tozero() #0 { ; PPC32-LABEL: setrnd_tozero: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: mtfsb0 30 @@ -96,6 +126,16 @@ define void @setrnd_tozero() { ; PPC64LE-NEXT: mtfsb1 31 ; PPC64LE-NEXT: blr ; +; P9_32-LABEL: setrnd_tozero: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 1 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_tozero: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 1 +; P9-NEXT: blr +; ; DM-LABEL: setrnd_tozero: ; DM: # %bb.0: # %entry ; DM-NEXT: mtfsb0 30 @@ -106,7 +146,7 @@ entry: ret void } -define void @setrnd_tonearest_tieeven() { +define void @setrnd_tonearest_tieeven() #0 { ; PPC32-LABEL: setrnd_tonearest_tieeven: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: mtfsb0 30 @@ -125,6 +165,16 @@ define void @setrnd_tonearest_tieeven() { ; PPC64LE-NEXT: mtfsb0 31 ; PPC64LE-NEXT: blr ; +; P9_32-LABEL: setrnd_tonearest_tieeven: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 0 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_tonearest_tieeven: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 0 +; P9-NEXT: blr +; ; DM-LABEL: setrnd_tonearest_tieeven: ; DM: # %bb.0: # %entry ; DM-NEXT: mtfsb0 30 @@ -135,7 +185,7 @@ entry: ret void } -define void @setrnd_toposinf() { +define void @setrnd_toposinf() #0 { ; PPC32-LABEL: setrnd_toposinf: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: mtfsb1 30 @@ -154,6 +204,16 @@ define void @setrnd_toposinf() { ; PPC64LE-NEXT: mtfsb0 31 ; PPC64LE-NEXT: blr ; +; P9_32-LABEL: setrnd_toposinf: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 2 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_toposinf: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 2 +; P9-NEXT: blr +; ; DM-LABEL: setrnd_toposinf: ; DM: # %bb.0: # %entry ; DM-NEXT: mtfsb1 30 @@ -164,7 +224,7 @@ entry: ret void } -define void @setrnd_toneginf() { +define void @setrnd_toneginf() #0 { ; PPC32-LABEL: setrnd_toneginf: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: mtfsb1 30 @@ -183,6 +243,16 @@ define void @setrnd_toneginf() { ; PPC64LE-NEXT: mtfsb1 31 ; PPC64LE-NEXT: blr ; +; P9_32-LABEL: setrnd_toneginf: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: mffscrni 0, 3 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_toneginf: +; P9: # %bb.0: # %entry +; P9-NEXT: mffscrni 0, 3 +; P9-NEXT: blr +; ; DM-LABEL: setrnd_toneginf: ; DM: # %bb.0: # %entry ; DM-NEXT: mtfsb1 30 @@ -193,11 +263,10 @@ entry: ret void } -define void @setrnd_var(i32 %x) { +define void @setrnd_var(i32 %x) #0 { ; PPC32-LABEL: setrnd_var: ; PPC32: # %bb.0: # %entry ; PPC32-NEXT: stwu 1, -16(1) -; PPC32-NEXT: .cfi_def_cfa_offset 16 ; PPC32-NEXT: mffs 0 ; PPC32-NEXT: stfd 0, 8(1) ; PPC32-NEXT: clrlwi 4, 3, 30 @@ -247,6 +316,29 @@ define void @setrnd_var(i32 %x) { ; PPC64LE-NEXT: mtfsf 255, 0 ; PPC64LE-NEXT: blr ; +; P9_32-LABEL: setrnd_var: +; P9_32: # %bb.0: # %entry +; P9_32-NEXT: stwu 1, -16(1) +; P9_32-NEXT: clrlwi 4, 3, 30 +; P9_32-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9_32-NEXT: xor 3, 3, 4 +; P9_32-NEXT: xori 3, 3, 1 +; P9_32-NEXT: stw 3, 12(1) +; P9_32-NEXT: lfd 0, 8(1) +; P9_32-NEXT: mffscrn 0, 0 +; P9_32-NEXT: addi 1, 1, 16 +; P9_32-NEXT: blr +; +; P9-LABEL: setrnd_var: +; P9: # %bb.0: # %entry +; P9-NEXT: clrlwi 4, 3, 30 +; P9-NEXT: rlwinm 3, 3, 31, 31, 31 +; P9-NEXT: xor 3, 3, 4 +; P9-NEXT: xori 3, 3, 1 +; P9-NEXT: mtfprd 0, 3 +; P9-NEXT: mffscrn 0, 0 +; P9-NEXT: blr +; ; DM-LABEL: setrnd_var: ; DM: # %bb.0: # %entry ; DM-NEXT: clrlwi 4, 3, 30 >From 9636dea38f886bbd9c4f197fbfa343ea59a88b12 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Wed, 10 Jan 2024 17:27:58 +0800 Subject: [PATCH 6/6] Address comments --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 16dcb973d6c967..70ab531a6ee07a 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8944,7 +8944,7 @@ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, Chain = MFFS.getValue(1); } SDValue NewFPSCR; - if (isTypeLegal(MVT::i64)) { + if (Subtarget.isPPC64()) { if (Subtarget.isISA3_0()) NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64); else _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits