Author: hans Date: Mon Jul 20 11:41:33 2015 New Revision: 242683 URL: http://llvm.org/viewvc/llvm-project?rev=242683&view=rev Log: Merging r242433: ------------------------------------------------------------------------ r242433 | tstellar | 2015-07-16 12:40:07 -0700 (Thu, 16 Jul 2015) | 11 lines
AMDPGU/SI: Use AssertZext node to mask high bit for scratch offsets Summary: We can safely assume that the high bit of scratch offsets will never be set, because this would require at least 128 GB of GPU memory. Reviewers: arsenm Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D11225 ------------------------------------------------------------------------ Modified: llvm/branches/release_37/ (props changed) llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp Propchange: llvm/branches/release_37/ ------------------------------------------------------------------------------ --- svn:mergeinfo (original) +++ svn:mergeinfo Mon Jul 20 11:41:33 2015 @@ -1,3 +1,3 @@ /llvm/branches/Apple/Pertwee:110850,110961 /llvm/branches/type-system-rewrite:133420-134817 -/llvm/trunk:155241,242236,242239,242281,242288,242296,242331,242341,242410,242412,242442,242543 +/llvm/trunk:155241,242236,242239,242281,242288,242296,242331,242341,242410,242412,242433,242442,242543 Modified: llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td?rev=242683&r1=242682&r2=242683&view=diff ============================================================================== --- llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td (original) +++ llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td Mon Jul 20 11:41:33 2015 @@ -123,6 +123,11 @@ def FeatureSGPRInitBug : SubtargetFeatur "true", "VI SGPR initilization bug requiring a fixed SGPR allocation size">; +def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer", + "EnableHugeScratchBuffer", + "true", + "Enable scratch buffer sizes greater than 128 GB">; + class SubtargetFeatureFetchLimit <string Value> : SubtargetFeature <"fetch"#Value, "TexVTXClauseSize", Modified: llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=242683&r1=242682&r2=242683&view=diff ============================================================================== --- llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original) +++ llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Jul 20 11:41:33 2015 @@ -73,7 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T WavefrontSize(0), CFALUBug(false), LocalMemorySize(0), EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false), GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0), - IsaVersion(ISAVersion0_0_0), + IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false), FrameLowering(TargetFrameLowering::StackGrowsUp, 64 * 16, // Maximum stack alignment (long16) 0), Modified: llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=242683&r1=242682&r2=242683&view=diff ============================================================================== --- llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h (original) +++ llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Jul 20 11:41:33 2015 @@ -89,6 +89,7 @@ private: bool FeatureDisable; int LDSBankCount; unsigned IsaVersion; + bool EnableHugeScratchBuffer; AMDGPUFrameLowering FrameLowering; std::unique_ptr<AMDGPUTargetLowering> TLInfo; @@ -271,6 +272,10 @@ public: return DevName; } + bool enableHugeScratchBuffer() const { + return EnableHugeScratchBuffer; + } + bool dumpCode() const { return DumpCode; } Modified: llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp?rev=242683&r1=242682&r2=242683&view=diff ============================================================================== --- llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp (original) +++ llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jul 20 11:41:33 2015 @@ -812,10 +812,29 @@ static SDNode *findUser(SDValue Value, u SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op); unsigned FrameIndex = FINode->getIndex(); - return DAG.getTargetFrameIndex(FrameIndex, MVT::i32); + // A FrameIndex node represents a 32-bit offset into scratch memory. If + // the high bit of a frame index offset were to be set, this would mean + // that it represented an offset of ~2GB * 64 = ~128GB from the start of the + // scratch buffer, with 64 being the number of threads per wave. + // + // If we know the machine uses less than 128GB of scratch, then we can + // amrk the high bit of the FrameIndex node as known zero, + // which is important, because it means in most situations we can + // prove that values derived from FrameIndex nodes are non-negative. + // This enables us to take advantage of more addressing modes when + // accessing scratch buffers, since for scratch reads/writes, the register + // offset must always be positive. + + SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32); + if (Subtarget->enableHugeScratchBuffer()) + return TFI; + + return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31))); } /// This transforms the control flow intrinsics to get the branch destination as @@ -2034,6 +2053,13 @@ void SITargetLowering::adjustWritemask(M } } +static bool isFrameIndexOp(SDValue Op) { + if (Op.getOpcode() == ISD::AssertZext) + Op = Op.getOperand(0); + + return isa<FrameIndexSDNode>(Op); +} + /// \brief Legalize target independent instructions (e.g. INSERT_SUBREG) /// with frame index operands. /// LLVM assumes that inputs are to these instructions are registers. @@ -2042,7 +2068,7 @@ void SITargetLowering::legalizeTargetInd SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < Node->getNumOperands(); ++i) { - if (!isa<FrameIndexSDNode>(Node->getOperand(i))) { + if (!isFrameIndexOp(Node->getOperand(i))) { Ops.push_back(Node->getOperand(i)); continue; } _______________________________________________ llvm-branch-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/llvm-branch-commits
