[llvm-branch-commits] [llvm] 09b3f3f - [benchmark] Fixed a build error when using CMake 3.15.1 + NDK-R20
Author: AnZhong Huang Date: 2021-01-04T11:00:57+03:00 New Revision: 09b3f3f22cbe159a737c44b2e78de08bbbfa5be3 URL: https://github.com/llvm/llvm-project/commit/09b3f3f22cbe159a737c44b2e78de08bbbfa5be3 DIFF: https://github.com/llvm/llvm-project/commit/09b3f3f22cbe159a737c44b2e78de08bbbfa5be3.diff LOG: [benchmark] Fixed a build error when using CMake 3.15.1 + NDK-R20 std::decay_t used by llvm/utils/benchmark/include/benchmark/benchmark.h is a c++14 feature, but the CMakelist uses c++11, it's the root-cause of build error. There are two options to fix the error. 1) change the CMakelist to support c++14. 2) change std::decay_t to std::decay, it's what the patch done. This bug can only be reproduced by CMake 3.15, we didn't observer the bug with CMake 3.16. But based on the code's logic, it's an obvious bug of LLVM. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D93794 Added: Modified: llvm/utils/benchmark/include/benchmark/benchmark.h Removed: diff --git a/llvm/utils/benchmark/include/benchmark/benchmark.h b/llvm/utils/benchmark/include/benchmark/benchmark.h index ab61c46e9386..3b535f1b7d52 100644 --- a/llvm/utils/benchmark/include/benchmark/benchmark.h +++ b/llvm/utils/benchmark/include/benchmark/benchmark.h @@ -990,7 +990,7 @@ inline internal::Benchmark* RegisterBenchmark(const char* name, #ifdef BENCHMARK_HAS_CXX11 template internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) { - using BenchType = internal::LambdaBenchmark>; + using BenchType = internal::LambdaBenchmark::type>; return internal::RegisterBenchmarkInternal( ::new BenchType(name, std::forward(fn))); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 94257d1 - [RISCV] Remove unused method isUImm5NonZero() from RISCVAsmParser.cpp. NFC
Author: Craig Topper Date: 2021-01-04T00:17:39-08:00 New Revision: 94257d12cb2dc23ccdc34b6eb26b02a9cde3e668 URL: https://github.com/llvm/llvm-project/commit/94257d12cb2dc23ccdc34b6eb26b02a9cde3e668 DIFF: https://github.com/llvm/llvm-project/commit/94257d12cb2dc23ccdc34b6eb26b02a9cde3e668.diff LOG: [RISCV] Remove unused method isUImm5NonZero() from RISCVAsmParser.cpp. NFC The operand predicate that used this has been gone for a while. Added: Modified: llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp Removed: diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 4172d33384bf..d31bb8f02dab 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -514,16 +514,6 @@ struct RISCVOperand : public MCParsedAsmOperand { return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None; } - bool isUImm5NonZero() const { -int64_t Imm; -RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; -if (!isImm()) - return false; -bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); -return IsConstantImm && isUInt<5>(Imm) && (Imm != 0) && - VK == RISCVMCExpr::VK_RISCV_None; - } - bool isSImm5() const { if (!isImm()) return false; @@ -540,7 +530,7 @@ struct RISCVOperand : public MCParsedAsmOperand { int64_t Imm; bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); return IsConstantImm && isInt<6>(Imm) && - VK == RISCVMCExpr::VK_RISCV_None; + VK == RISCVMCExpr::VK_RISCV_None; } bool isSImm6NonZero() const { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] a650920 - [SVE] Fix inline assembly parsing crash
Author: David Sherwood Date: 2021-01-04T09:11:05Z New Revision: a65092040ad4fefcdad18382781090839cad3b67 URL: https://github.com/llvm/llvm-project/commit/a65092040ad4fefcdad18382781090839cad3b67 DIFF: https://github.com/llvm/llvm-project/commit/a65092040ad4fefcdad18382781090839cad3b67.diff LOG: [SVE] Fix inline assembly parsing crash This patch fixes a crash encountered when compiling this code: ... float16_t a; __asm__("fminv %h[a], %[b], %[c].h" : [a] "=r" (a) : [b] "Upl" (b), [c] "w" (c)) The issue here is when using the 'h' modifier for a register constraint 'r'. Differential Revision: https://reviews.llvm.org/D93537 Added: Modified: llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp llvm/test/CodeGen/AArch64/inline-asm-constraints-bad-sve.ll Removed: diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index c18e9a4e6db1..c7fa49c965a8 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -647,7 +647,8 @@ bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO, const TargetRegisterInfo *RI = STI->getRegisterInfo(); Register Reg = MO.getReg(); unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg)); - assert(RI->regsOverlap(RegToPrint, Reg)); + if (!RI->regsOverlap(RegToPrint, Reg)) +return true; O << AArch64InstPrinter::getRegisterName(RegToPrint, AltName); return false; } diff --git a/llvm/test/CodeGen/AArch64/inline-asm-constraints-bad-sve.ll b/llvm/test/CodeGen/AArch64/inline-asm-constraints-bad-sve.ll index 5a2f4746af87..aa25d118c9b5 100644 --- a/llvm/test/CodeGen/AArch64/inline-asm-constraints-bad-sve.ll +++ b/llvm/test/CodeGen/AArch64/inline-asm-constraints-bad-sve.ll @@ -6,6 +6,7 @@ target triple = "aarch64-unknown-linux-gnu" ; CHECK: error: couldn't allocate input reg for constraint 'Upa' ; CHECK: error: couldn't allocate input reg for constraint 'r' ; CHECK: error: couldn't allocate output register for constraint 'w' +; CHECK: error: unknown token in expression define @foo1(i32 *%in) { entry: @@ -27,3 +28,11 @@ entry: %1 = call asm sideeffect "mov $0.b, $1.b \0A", "=&w,w"( %0) ret %1 } + +define half @foo4( *%inp, *%inv) { +entry: + %0 = load , * %inp, align 2 + %1 = load , * %inv, align 16 + %2 = call half asm "fminv ${0:h}, $1, $2.h", "=r,@3Upl,w"( %0, %1) + ret half %2 +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 74e7cb2 - [VE] Remove VA.needsCustom checks
Author: Kazushi (Jam) Marukawa Date: 2021-01-04T18:19:18+09:00 New Revision: 74e7cb26b9a3ff3f64883cc2418d9916b5e9257d URL: https://github.com/llvm/llvm-project/commit/74e7cb26b9a3ff3f64883cc2418d9916b5e9257d DIFF: https://github.com/llvm/llvm-project/commit/74e7cb26b9a3ff3f64883cc2418d9916b5e9257d.diff LOG: [VE] Remove VA.needsCustom checks Remove VA.needsCustom checks which are copied from Sparc implementation at the very beginning of VE implementation. Add assert to sanity-check VA.needsCustom flag, also. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D93847 Added: Modified: llvm/lib/Target/VE/VEISelLowering.cpp Removed: diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp index ea9281a00502..5a3a9f504e30 100644 --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -314,6 +314,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); +assert(!VA.needsCustom() && "Unexpected custom lowering"); SDValue OutVal = OutVals[i]; // Integer return values must be sign or zero extended by the callee. @@ -349,8 +350,6 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, llvm_unreachable("Unknown loc info!"); } -assert(!VA.needsCustom() && "Unexpected custom lowering"); - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); // Guarantee that all emitted copies are stuck together with flags. @@ -390,6 +389,7 @@ SDValue VETargetLowering::LowerFormalArguments( for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; +assert(!VA.needsCustom() && "Unexpected custom lowering"); if (VA.isRegLoc()) { // This argument is passed in a register. // All integer register arguments are promoted by the caller to i64. @@ -399,11 +399,6 @@ SDValue VETargetLowering::LowerFormalArguments( MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); - // Get the high bits for i32 struct elements. - if (VA.getValVT() == MVT::i32 && VA.needsCustom()) -Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, - DAG.getConstant(32, DL, MVT::i32)); - // The caller promoted the argument, so insert an Assert?ext SDNode so we // won't promote the value again in this function. switch (VA.getLocInfo()) { @@ -734,6 +729,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Copy all of the result registers out of their specified physreg. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; +assert(!VA.needsCustom() && "Unexpected custom lowering"); unsigned Reg = VA.getLocReg(); // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can @@ -751,11 +747,6 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InGlue = Chain.getValue(2); } -// Get the high bits for i32 struct elements. -if (VA.getValVT() == MVT::i32 && VA.needsCustom()) - RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, - DAG.getConstant(32, DL, MVT::i32)); - // The callee promoted the return value, so insert an Assert?ext SDNode so // we won't promote the value again in this function. switch (VA.getLocInfo()) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] e43b3d1 - Revert "[Sema] Fix deleted function problem in implicitly movable test"
Author: Yang Fan Date: 2021-01-04T17:21:19+08:00 New Revision: e43b3d1f5e05c6e5e07cff054df193cf0d0c6583 URL: https://github.com/llvm/llvm-project/commit/e43b3d1f5e05c6e5e07cff054df193cf0d0c6583 DIFF: https://github.com/llvm/llvm-project/commit/e43b3d1f5e05c6e5e07cff054df193cf0d0c6583.diff LOG: Revert "[Sema] Fix deleted function problem in implicitly movable test" This reverts commit 89b0972a Added: Modified: clang/lib/Sema/SemaInit.cpp clang/lib/Sema/SemaStmt.cpp Removed: clang/test/CXX/class/class.init/class.copy.elision/p3.cpp diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 4a965c60c74e..6d2e6094e79c 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -4119,9 +4119,7 @@ static void TryConstructorInitialization(Sema &S, InitializationSequence::FK_ListConstructorOverloadFailed : InitializationSequence::FK_ConstructorOverloadFailed, Result); - -if (Result != OR_Deleted) - return; +return; } bool HadMultipleCandidates = (CandidateSet.size() > 1); @@ -4142,45 +4140,31 @@ static void TryConstructorInitialization(Sema &S, return; } + // C++11 [dcl.init]p6: + // If a program calls for the default initialization of an object + // of a const-qualified type T, T shall be a class type with a + // user-provided default constructor. + // C++ core issue 253 proposal: + // If the implicit default constructor initializes all subobjects, no + // initializer should be required. + // The 253 proposal is for example needed to process libstdc++ headers in 5.x. CXXConstructorDecl *CtorDecl = cast(Best->Function); - if (Result != OR_Deleted) { // TODO: Support for more than one failure. -// C++11 [dcl.init]p6: -// If a program calls for the default initialization of an object -// of a const-qualified type T, T shall be a class type with a -// user-provided default constructor. -// C++ core issue 253 proposal: -// If the implicit default constructor initializes all subobjects, no -// initializer should be required. -// The 253 proposal is for example needed to process libstdc++ headers -// in 5.x. -if (Kind.getKind() == InitializationKind::IK_Default && -Entity.getType().isConstQualified()) { - if (!CtorDecl->getParent()->allowConstDefaultInit()) { -if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity)) - Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst); -return; - } -} - -// C++11 [over.match.list]p1: -// In copy-list-initialization, if an explicit constructor is chosen, the -// initializer is ill-formed. -if (IsListInit && !Kind.AllowExplicit() && CtorDecl->isExplicit()) { - Sequence.SetFailed(InitializationSequence::FK_ExplicitConstructor); + if (Kind.getKind() == InitializationKind::IK_Default && + Entity.getType().isConstQualified()) { +if (!CtorDecl->getParent()->allowConstDefaultInit()) { + if (!maybeRecoverWithZeroInitialization(S, Sequence, Entity)) +Sequence.SetFailed(InitializationSequence::FK_DefaultInitOfConst); return; } } - // [class.copy.elision]p3: - // In some copy-initialization contexts, a two-stage overload resolution - // is performed. - // If the first overload resolution selects a deleted function, we also - // need the initialization sequence to decide whether to perform the second - // overload resolution. - // For deleted functions in other contexts, there is no need to get the - // initialization sequence. - if (Result == OR_Deleted && Kind.getKind() != InitializationKind::IK_Copy) + // C++11 [over.match.list]p1: + // In copy-list-initialization, if an explicit constructor is chosen, the + // initializer is ill-formed. + if (IsListInit && !Kind.AllowExplicit() && CtorDecl->isExplicit()) { +Sequence.SetFailed(InitializationSequence::FK_ExplicitConstructor); return; + } // Add the constructor initialization step. Any cv-qualification conversion is // subsumed by the initialization. @@ -5276,16 +5260,7 @@ static void TryUserDefinedConversion(Sema &S, Sequence.SetOverloadFailure( InitializationSequence::FK_UserConversionOverloadFailed, Result); - -// [class.copy.elision]p3: -// In some copy-initialization contexts, a two-stage overload resolution -// is performed. -// If the first overload resolution selects a deleted function, we also -// need the initialization sequence to decide whether to perform the second -// overload resolution. -if (!(Result == OR_Deleted && - Kind.getKind() == InitializationKind::IK_Copy)) - return; +return; } FunctionDecl *Function = Best->Function
[llvm-branch-commits] [llvm] e090555 - [ArgPromotion] Delay dead GEP removal until doPromotion.
Author: Florian Hahn Date: 2021-01-04T09:51:20Z New Revision: e0905553b424afe94938e2cc010fcecd0822eaba URL: https://github.com/llvm/llvm-project/commit/e0905553b424afe94938e2cc010fcecd0822eaba DIFF: https://github.com/llvm/llvm-project/commit/e0905553b424afe94938e2cc010fcecd0822eaba.diff LOG: [ArgPromotion] Delay dead GEP removal until doPromotion. Currently ArgPromotion removes dead GEPs as part of the legality check in isSafeToPromoteArgument. If no promotion happens, this means the pass claims no modifications happened, even though GEPs were removed. This patch fixes the issue by delaying removal of dead GEPs until doPromotion: isSafeToPromoteArgument can simply skips dead GEPs and the code in doPromotion dealing with GEPs is updated to account for dead GEPs. Once we committed to promotion, it should be safe to remove dead GEPs. Alternatively isSafeToPromoteArgument could return an additional boolean to indicate whether it made changes, but this is quite cumbersome and there should be no real benefit of weeding out some dead GEPs here if we do not perform promotion. I added a test for the case where dead GEPs need to be removed when promotion happens in 578c5a0c6e71. Fixes PR47477. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D93991 Added: llvm/test/Transforms/ArgumentPromotion/dead-gep-no-promotion.ll Modified: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp Removed: diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 11c8d3b7a8d6..eaaee9a520ab 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -160,13 +160,19 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, // In this table, we will track which indices are loaded from the argument // (where direct loads are tracked as no indices). ScalarizeTable &ArgIndices = ScalarizedElements[&*I]; - for (User *U : I->users()) { -Instruction *UI = cast(U); + for (auto Iter = I->user_begin(), End = I->user_end(); Iter != End;) { +Instruction *UI = cast(*Iter++); Type *SrcTy; if (LoadInst *L = dyn_cast(UI)) SrcTy = L->getType(); else SrcTy = cast(UI)->getSourceElementType(); +// Skip dead GEPs and remove them. +if (isa(UI) && UI->use_empty()) { + UI->eraseFromParent(); + continue; +} + IndicesVector Indices; Indices.reserve(UI->getNumOperands() - 1); // Since loads will only have a single operand, and GEPs only a single @@ -436,6 +442,8 @@ doPromotion(Function *F, SmallPtrSetImpl &ArgsToPromote, << "' in function '" << F->getName() << "'\n"); } else { GetElementPtrInst *GEP = cast(I->user_back()); +assert(!GEP->use_empty() && + "GEPs without uses should be cleaned up already"); IndicesVector Operands; Operands.reserve(GEP->getNumIndices()); for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); @@ -674,11 +682,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. -GEP->eraseFromParent(); -// TODO: This runs the above loop over and over again for dead GEPs -// Couldn't we just do increment the UI iterator earlier and erase the -// use? -return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements); +continue; } if (!UpdateBaseTy(GEP->getSourceElementType())) diff --git a/llvm/test/Transforms/ArgumentPromotion/dead-gep-no-promotion.ll b/llvm/test/Transforms/ArgumentPromotion/dead-gep-no-promotion.ll new file mode 100644 index ..72431a43ae6f --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/dead-gep-no-promotion.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -argpromotion -S %s | FileCheck %s + +@glob = external global i32* + +; No arguments in @callee can be promoted, but it contains a dead GEP. Make +; sure it is not removed, as we do not perform any promotion. +define i32 @caller(i32* %ptr) { +; CHECK-LABEL: @caller( +; CHECK-NEXT:call void @callee(i32* [[PTR:%.*]], i32* [[PTR]], i32* [[PTR]]) +; CHECK-NEXT:ret i32 0 +; + call void @callee(i32* %ptr, i32* %ptr, i32* %ptr) + ret i32 0 +} + +define internal void @callee(i32* %arg, i32* %arg1, i32* %arg2) { +; CHECK-LABEL: define internal void @callee( +; CHECK-NEXT:call void @external_fn(i32* [[ARG:%.*]], i32* [[ARG1:%.*]]) +; CHECK-NEXT:[[DEAD_GEP:%.*]] = getelementptr inbounds i32, i32* [[ARG1]], i32 17 +; CHECK-NEXT:store i32* [[ARG2:%.*]], i32** @glob, a
[llvm-branch-commits] [llvm] 685c8b5 - [AARCH64] Improve accumulator forwarding for Cortex-A57 model
Author: Usman Nadeem Date: 2021-01-04T10:58:43Z New Revision: 685c8b537af3138cff24ec6060a86140b8963a1e URL: https://github.com/llvm/llvm-project/commit/685c8b537af3138cff24ec6060a86140b8963a1e DIFF: https://github.com/llvm/llvm-project/commit/685c8b537af3138cff24ec6060a86140b8963a1e.diff LOG: [AARCH64] Improve accumulator forwarding for Cortex-A57 model The old CPU model only had MLA->MLA forwarding. I added some missing MUL->MLA read advances and a missing absolute diff accumulator read advance according to the Cortex A57 Software Optimization Guide. The patch improves performance in EEMBC rgbyiqv2 by about 6%-7% and spec2006/milc by 8% (repeated runs on multiple devices), causes no significant regressions (none in SPEC). Differential Revision: https://reviews.llvm.org/D92296 Added: llvm/test/tools/llvm-mca/AArch64/Cortex/forwarding-A57.s Modified: llvm/lib/Target/AArch64/AArch64SchedA57.td llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td Removed: diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td index 7c40da05c305..aa5bec8088e4 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -93,7 +93,7 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; -def : SchedAlias; +def : WriteRes { let Latency = 5;} def : SchedAlias; def : SchedAlias; def : SchedAlias; @@ -350,12 +350,16 @@ def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$") // D form - v8i8_v8i16, v4i16_v4i32, v2i32_v2i64 // Q form - v16i8_v8i16, v8i16_v4i32, v4i32_v2i64 +// Cortex A57 Software Optimization Guide Sec 3.14 +// Advance for absolute diff accum, pairwise add and accumulate, shift accumulate +def A57ReadIVA3 : SchedReadAdvance<3, [A57Write_4cyc_1X_NonMul_Forward, A57Write_5cyc_2X_NonMul_Forward]>; + // ASIMD absolute diff accum, D-form -def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; +def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; // ASIMD absolute diff accum, Q-form -def : InstRW<[A57Write_5cyc_2X], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; +def : InstRW<[A57Write_5cyc_2X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; // ASIMD absolute diff accum long -def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]ABAL")>; +def : InstRW<[A57Write_4cyc_1X_NonMul_Forward, A57ReadIVA3], (instregex "^[SU]ABAL")>; // ASIMD arith, reduce, 4H/4S def : InstRW<[A57Write_4cyc_1X], (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; @@ -372,32 +376,41 @@ def : InstRW<[A57Write_7cyc_1V_1X], (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")> def : InstRW<[A57Write_8cyc_2X], (instregex "^[SU](MIN|MAX)Vv16i8v$")>; // ASIMD multiply, D-form -def : InstRW<[A57Write_5cyc_1W], (instregex "^(P?MUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; +// MUL +def : InstRW<[A57Write_5cyc_1W_Mul_Forward], (instregex "^MUL(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; +// PMUL, SQDMULH, SQRDMULH +def : InstRW<[A57Write_5cyc_1W], (instregex "^(PMUL|SQR?DMULH)(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)(_indexed)?$")>; + // ASIMD multiply, Q-form -def : InstRW<[A57Write_6cyc_2W], (instregex "^(P?MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +// MUL +def : InstRW<[A57Write_6cyc_2W_Mul_Forward], (instregex "^MUL(v16i8|v8i16|v4i32)(_indexed)?$")>; +// PMUL, SQDMULH, SQRDMULH +def : InstRW<[A57Write_6cyc_2W], (instregex "^(PMUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; + +// Cortex A57 Software Optimization Guide Sec 3.14 +def A57ReadIVMA4 : SchedReadAdvance<4 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>; +def A57ReadIVMA3 : SchedReadAdvance<3 , [A57Write_5cyc_1W_Mul_Forward, A57Write_6cyc_2W_Mul_Forward]>; // ASIMD multiply accumulate, D-form -def : InstRW<[A57Write_5cyc_1W], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; +def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; // ASIMD multiply accumulate, Q-form -def : InstRW<[A57Write_6cyc_2W], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[A57Write_6cyc_2W_Mul_Forward, A57ReadIVMA4], (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; // ASIMD multiply accumulate long // ASIMD multiply accumulate saturating long -def A57WriteIVMA : SchedWriteRes<[A57UnitW]> { let Latency = 5; } -def A57ReadIVMA4 : SchedReadAdvance<4, [A57WriteIVMA]>; -def : InstRW<[A57WriteIVMA, A57ReadIVMA4], (instregex "^(S|U|SQD)ML[AS]L")>; +def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA4], (instregex "^(S|U)ML[AS]L")>; +def : InstRW<[A57Write_5cyc_1W_Mul_Forward, A57ReadIVMA3], (instregex "^SQDML[AS]L")>; // ASIMD multiply long -def : InstRW<[A57Write_5cyc_1W], (
[llvm-branch-commits] [llvm] c287f90 - [VE] Change default CPU name to "generic"
Author: Kazushi (Jam) Marukawa Date: 2021-01-04T20:09:57+09:00 New Revision: c287f90ccd33b3aa47488e8f2b3a24aa0717066b URL: https://github.com/llvm/llvm-project/commit/c287f90ccd33b3aa47488e8f2b3a24aa0717066b DIFF: https://github.com/llvm/llvm-project/commit/c287f90ccd33b3aa47488e8f2b3a24aa0717066b.diff LOG: [VE] Change default CPU name to "generic" Change default CPU name of SX-Aurora VE from "ve" to "generic" similar to other architectures. Reviewed By: simoll Differential Revision: https://reviews.llvm.org/D93836 Added: llvm/test/CodeGen/VE/Scalar/cpu.ll Modified: llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp llvm/lib/Target/VE/VE.td llvm/lib/Target/VE/VESubtarget.cpp Removed: diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp index 239a89812e47..4c480c050274 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp @@ -56,7 +56,7 @@ static MCRegisterInfo *createVEMCRegisterInfo(const Triple &TT) { static MCSubtargetInfo *createVEMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (CPU.empty()) -CPU = "ve"; +CPU = "generic"; return createVEMCSubtargetInfoImpl(TT, CPU, /*TuneCPU=*/CPU, FS); } diff --git a/llvm/lib/Target/VE/VE.td b/llvm/lib/Target/VE/VE.td index a2c0ba04adaa..9e8adcd42077 100644 --- a/llvm/lib/Target/VE/VE.td +++ b/llvm/lib/Target/VE/VE.td @@ -46,7 +46,7 @@ def VEAsmParser : AsmParser { class Proc Features> : Processor; -def : Proc<"ve", []>; +def : Proc<"generic", []>; //===--===// // Declare the target which we are implementing diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp index 3406a613e89d..daa6cfb8aa84 100644 --- a/llvm/lib/Target/VE/VESubtarget.cpp +++ b/llvm/lib/Target/VE/VESubtarget.cpp @@ -33,7 +33,7 @@ VESubtarget &VESubtarget::initializeSubtargetDependencies(StringRef CPU, // Determine default and user specified characteristics std::string CPUName = std::string(CPU); if (CPUName.empty()) -CPUName = "ve"; +CPUName = "generic"; // Parse features string. ParseSubtargetFeatures(CPUName, /*TuneCPU=*/CPU, FS); diff --git a/llvm/test/CodeGen/VE/Scalar/cpu.ll b/llvm/test/CodeGen/VE/Scalar/cpu.ll new file mode 100644 index ..7586a38f9f5a --- /dev/null +++ b/llvm/test/CodeGen/VE/Scalar/cpu.ll @@ -0,0 +1,5 @@ +; RUN: llc -mtriple=ve -mcpu=help < %s 2>&1 | FileCheck %s + +; CHECK: Available CPUs for this target: +; CHECK-EMPTY: +; CHECK-NEXT: generic - Select the generic processor. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 6c89f6f - [AArch64] Attempt to fix Mac tests with a more specific triple. NFC
Author: David Green Date: 2021-01-04T11:29:18Z New Revision: 6c89f6fae4913eba07093fe7c268e828f801c78b URL: https://github.com/llvm/llvm-project/commit/6c89f6fae4913eba07093fe7c268e828f801c78b DIFF: https://github.com/llvm/llvm-project/commit/6c89f6fae4913eba07093fe7c268e828f801c78b.diff LOG: [AArch64] Attempt to fix Mac tests with a more specific triple. NFC Added: Modified: llvm/test/tools/llvm-mca/AArch64/Cortex/forwarding-A57.s Removed: diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/forwarding-A57.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/forwarding-A57.s index a71c99400c4e..f111c4101ab0 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/forwarding-A57.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/forwarding-A57.s @@ -1,4 +1,4 @@ -# RUN: llvm-mca -march=aarch64 -mcpu=cortex-a57 -iterations=1 -timeline < %s | FileCheck %s +# RUN: llvm-mca -mtriple=aarch64-none-eabi -mcpu=cortex-a57 -iterations=1 -timeline < %s | FileCheck %s # CHECK: [0] Code Region # CHECK: Instructions: 2 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 975b64b - [docs] Release notes for IsDecl in DIModule.
Author: Chih-Ping Chen Date: 2021-01-04T07:03:34-05:00 New Revision: 975b64b29375cdfb3672fedee4216c6512672fbf URL: https://github.com/llvm/llvm-project/commit/975b64b29375cdfb3672fedee4216c6512672fbf DIFF: https://github.com/llvm/llvm-project/commit/975b64b29375cdfb3672fedee4216c6512672fbf.diff LOG: [docs] Release notes for IsDecl in DIModule. Please see https://reviews.llvm.org/D93462 for the actual code change. Differential Revision: https://reviews.llvm.org/D93558 Added: Modified: llvm/docs/ReleaseNotes.rst Removed: diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index b77d054079a3..48fa42f5c930 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -169,6 +169,12 @@ Changes to the Debug Info During this release ... +* The DIModule metadata is extended with a field to indicate if it is a + module declaration. This extension enables the emission of debug info + for a Fortran 'use ' statement. For more information + on what the debug info entries should look like and how the debugger + can use them, please see test/DebugInfo/X86/dimodule-external-fortran.ll. + Changes to the LLVM tools - ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 42652c1 - [Sparc] Fixes for the internal assembler
Author: LemonBoy Date: 2021-01-04T13:25:37+01:00 New Revision: 42652c1d6e21345173f5dd971cd453520aa5a7ef URL: https://github.com/llvm/llvm-project/commit/42652c1d6e21345173f5dd971cd453520aa5a7ef DIFF: https://github.com/llvm/llvm-project/commit/42652c1d6e21345173f5dd971cd453520aa5a7ef.diff LOG: [Sparc] Fixes for the internal assembler * Prevent the generation of invalid shift instructions by constraining the immediate field. I've limited the shift field to constant values only, adding the `R_SPARC_5`/`R_SPARC_6` relocations is trivial if needed (but I can't really think of a use case for those). * Fix the generation of PC-relative `call` * Fix the transformation of `jmp sym` into `jmpl` * Emit fixups for simm13 operands I moved the choice of the correct relocation into the code emitter as I've seen the other backends do, it can be definitely cleaner but the aim was to reduce the scope of the patch as much as possible. Fixes the problems raised by joerg in L254199 Reviewed By: dcederman Differential Revision: https://reviews.llvm.org/D78193 Added: Modified: llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h llvm/lib/Target/Sparc/SparcAsmPrinter.cpp llvm/lib/Target/Sparc/SparcISelLowering.cpp llvm/lib/Target/Sparc/SparcInstr64Bit.td llvm/lib/Target/Sparc/SparcInstrFormats.td llvm/lib/Target/Sparc/SparcInstrInfo.td llvm/test/MC/Sparc/sparc-asm-errors.s llvm/test/MC/Sparc/sparc-ctrl-instructions.s llvm/test/MC/Sparc/sparc-relocations.s Removed: diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index 16e159621672..5f1bf316e871 100644 --- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -82,6 +82,11 @@ class SparcAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseMembarTag(OperandVector &Operands); + template + OperandMatchResultTy parseShiftAmtImm(OperandVector &Operands); + + OperandMatchResultTy parseCallTarget(OperandVector &Operands); + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name); OperandMatchResultTy @@ -262,6 +267,36 @@ class SparcOperand : public MCParsedAsmOperand { bool isMEMri() const { return Kind == k_MemoryImm; } bool isMembarTag() const { return Kind == k_Immediate; } + bool isCallTarget() const { +if (!isImm()) + return false; + +if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) + return CE->getValue() % 4 == 0; + +return true; + } + + bool isShiftAmtImm5() const { +if (!isImm()) + return false; + +if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) + return isUInt<5>(CE->getValue()); + +return false; + } + + bool isShiftAmtImm6() const { +if (!isImm()) + return false; + +if (const MCConstantExpr *CE = dyn_cast(Imm.Val)) + return isUInt<6>(CE->getValue()); + +return false; + } + bool isIntReg() const { return (Kind == k_Register && Reg.Kind == rk_IntReg); } @@ -343,6 +378,15 @@ class SparcOperand : public MCParsedAsmOperand { addExpr(Inst, Expr); } + void addShiftAmtImm5Operands(MCInst &Inst, unsigned N) const { +assert(N == 1 && "Invalid number of operands!"); +addExpr(Inst, getImm()); + } + void addShiftAmtImm6Operands(MCInst &Inst, unsigned N) const { +assert(N == 1 && "Invalid number of operands!"); +addExpr(Inst, getImm()); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const{ // Add as immediate when possible. Null MCExpr = 0. if (!Expr) @@ -377,6 +421,11 @@ class SparcOperand : public MCParsedAsmOperand { addExpr(Inst, Expr); } + void addCallTargetOperands(MCInst &Inst, unsigned N) const { +assert(N == 1 && "Invalid number of operands!"); +addExpr(Inst, getImm()); + } + static std::unique_ptr CreateToken(StringRef Str, SMLoc S) { auto Op = std::make_unique(k_Token); Op->Tok.Data = Str.data(); @@ -645,7 +694,7 @@ OperandMatchResultTy SparcAsmParser::tryParseRegister(unsigned &RegNo, EndLoc = Tok.getEndLoc(); RegNo = 0; if (getLexer().getKind() != AsmToken::Percent) -return MatchOperand_Success; +return MatchOperand_NoMatch; Parser.Lex(); unsigned regKind = SparcOperand::rk_None; if (matchRegisterName(Tok, RegNo, regKind)) { @@ -729,37 +778,74 @@ ParseDirective(AsmToken DirectiveID) OperandMatchResultTy SparcAsmParser::parseMEMOperand(OperandVector &Operands) { SMLoc S, E; - unsigned BaseReg = 0; - if (ParseRegister(BaseReg, S, E)) { + std::unique_ptr LHS; + if (parseSparcAsmOperand(LHS) != MatchOperand_Success) re
[llvm-branch-commits] [llvm] 901cc9b - [ARM] Extend lowering for i64 reductions
Author: David Green Date: 2021-01-04T12:44:43Z New Revision: 901cc9b6f30f120f2fbdc01f9eec3708c512186b URL: https://github.com/llvm/llvm-project/commit/901cc9b6f30f120f2fbdc01f9eec3708c512186b DIFF: https://github.com/llvm/llvm-project/commit/901cc9b6f30f120f2fbdc01f9eec3708c512186b.diff LOG: [ARM] Extend lowering for i64 reductions The lowering of a <4 x i16> or <4 x i8> vecreduce.add into an i64 would previously be expanded, due to the i64 not being legal. This patch adjusts our reduction matchers, making it produce a VADDLV(sext A to v4i32) instead. Differential Revision: https://reviews.llvm.org/D93622 Added: Modified: llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll Removed: diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6eb1bdffdac4..6a8355f0c3e8 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -14959,12 +14959,23 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, // VADDLV u/s 32 // VMLALV u/s 16/32 + // If the input vector is smaller than legal (v4i8/v4i16 for example) we can + // extend it and use v4i32 instead. + auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) { +EVT AVT = A.getValueType(); +if (!AVT.is128BitVector()) + A = DAG.getNode(ExtendCode, dl, + AVT.changeVectorElementType(MVT::getIntegerVT( + 128 / AVT.getVectorMinNumElements())), + A); +return A; + }; auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef ExtTypes) { if (ResVT != RetTy || N0->getOpcode() != ExtendCode) return SDValue(); SDValue A = N0->getOperand(0); if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; })) - return A; + return ExtendIfNeeded(A, ExtendCode); return SDValue(); }; auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode, @@ -14978,7 +14989,7 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue A = Ext->getOperand(0); if (llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; })) - return A; + return ExtendIfNeeded(A, ExtendCode); return SDValue(); }; auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef ExtTypes, @@ -15007,8 +15018,12 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, A = ExtA->getOperand(0); B = ExtB->getOperand(0); if (A.getValueType() == B.getValueType() && -llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; })) +llvm::any_of(ExtTypes, + [&A](MVT Ty) { return A.getValueType() == Ty; })) { + A = ExtendIfNeeded(A, ExtendCode); + B = ExtendIfNeeded(B, ExtendCode); return true; +} return false; }; auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef ExtTypes, @@ -15037,8 +15052,12 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, A = ExtA->getOperand(0); B = ExtB->getOperand(0); if (A.getValueType() == B.getValueType() && -llvm::any_of(ExtTypes, [&A](MVT Ty) { return A.getValueType() == Ty; })) +llvm::any_of(ExtTypes, + [&A](MVT Ty) { return A.getValueType() == Ty; })) { + A = ExtendIfNeeded(A, ExtendCode); + B = ExtendIfNeeded(B, ExtendCode); return true; +} return false; }; auto Create64bitNode = [&](unsigned Opcode, ArrayRef Ops) { @@ -15051,9 +15070,11 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A); if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8})) return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A); - if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32})) + if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, + {MVT::v4i8, MVT::v4i16, MVT::v4i32})) return Create64bitNode(ARMISD::VADDLVs, {A}); - if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32})) + if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, + {MVT::v4i8, MVT::v4i16, MVT::v4i32})) return Create64bitNode(ARMISD::VADDLVu, {A}); if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8})) return DAG.getNode(ISD::TRUNCATE, dl, ResVT, @@ -15067,9 +15088,11 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask); if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND,
[llvm-branch-commits] [llvm] 23b4198 - [Support] Add KnownBits::icmp helpers.
Author: Simon Pilgrim Date: 2021-01-04T12:46:27Z New Revision: 23b41986527a3fc5615480a8f7a0b0debd5fcef4 URL: https://github.com/llvm/llvm-project/commit/23b41986527a3fc5615480a8f7a0b0debd5fcef4 DIFF: https://github.com/llvm/llvm-project/commit/23b41986527a3fc5615480a8f7a0b0debd5fcef4.diff LOG: [Support] Add KnownBits::icmp helpers. Check if all possible values for a pair of knownbits give the same icmp result - these are based off the checks performed in InstCombineCompares.cpp and D86578. Add exhaustive unit test coverage - a followup will update InstCombineCompares.cpp to use this. Added: Modified: llvm/include/llvm/Support/KnownBits.h llvm/lib/Support/KnownBits.cpp llvm/unittests/Support/KnownBitsTest.cpp Removed: diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h index ec88b9807174..edb771d659e2 100644 --- a/llvm/include/llvm/Support/KnownBits.h +++ b/llvm/include/llvm/Support/KnownBits.h @@ -15,6 +15,7 @@ #define LLVM_SUPPORT_KNOWNBITS_H #include "llvm/ADT/APInt.h" +#include "llvm/ADT/Optional.h" namespace llvm { @@ -328,6 +329,36 @@ struct KnownBits { /// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS. static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS); + /// Determine if these known bits always give the same ICMP_EQ result. + static Optional eq(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_NE result. + static Optional ne(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_UGT result. + static Optional ugt(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_UGE result. + static Optional uge(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_ULT result. + static Optional ult(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_ULE result. + static Optional ule(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SGT result. + static Optional sgt(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SGE result. + static Optional sge(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SLT result. + static Optional slt(const KnownBits &LHS, const KnownBits &RHS); + + /// Determine if these known bits always give the same ICMP_SLE result. + static Optional sle(const KnownBits &LHS, const KnownBits &RHS); + /// Insert the bits from a smaller known bits starting at bitPosition. void insertBits(const KnownBits &SubBits, unsigned BitPosition) { Zero.insertBits(SubBits.Zero, BitPosition); diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp index 2c25b7d9bac5..0147d21d153a 100644 --- a/llvm/lib/Support/KnownBits.cpp +++ b/llvm/lib/Support/KnownBits.cpp @@ -268,6 +268,75 @@ KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS) { return Known; } +Optional KnownBits::eq(const KnownBits &LHS, const KnownBits &RHS) { + if (LHS.isConstant() && RHS.isConstant()) +return Optional(LHS.getConstant() == RHS.getConstant()); + if (LHS.getMaxValue().ult(RHS.getMinValue()) || + LHS.getMinValue().ugt(RHS.getMaxValue())) +return Optional(false); + if (LHS.One.intersects(RHS.Zero) || RHS.One.intersects(LHS.Zero)) +return Optional(false); + return None; +} + +Optional KnownBits::ne(const KnownBits &LHS, const KnownBits &RHS) { + if (Optional KnownEQ = eq(LHS, RHS)) +return Optional(!KnownEQ.getValue()); + return None; +} + +Optional KnownBits::ugt(const KnownBits &LHS, const KnownBits &RHS) { + if (LHS.isConstant() && RHS.isConstant()) +return Optional(LHS.getConstant().ugt(RHS.getConstant())); + // LHS >u RHS -> false if umax(LHS) <= umax(RHS) + if (LHS.getMaxValue().ule(RHS.getMinValue())) +return Optional(false); + // LHS >u RHS -> true if umin(LHS) > umax(RHS) + if (LHS.getMinValue().ugt(RHS.getMaxValue())) +return Optional(true); + return None; +} + +Optional KnownBits::uge(const KnownBits &LHS, const KnownBits &RHS) { + if (Optional IsUGT = ugt(RHS, LHS)) +return Optional(!IsUGT.getValue()); + return None; +} + +Optional KnownBits::ult(const KnownBits &LHS, const KnownBits &RHS) { + return ugt(RHS, LHS); +} + +Optional KnownBits::ule(const KnownBits &LHS, const KnownBits &RHS) { + return uge(RHS, LHS); +} + +Optional KnownBits::sgt(const KnownBits &LHS, const KnownBits &RHS) { + if (LHS.isConstant() && RHS.isConstant()) +return Optional(LHS.getConstant().sgt(RHS.getConstant())); + // LHS >s RHS -> false if smax(LHS) <= smax
[llvm-branch-commits] [llvm] d38a025 - [AArch64] Add patterns for FMCLA*_indexed.
Author: Florian Hahn Date: 2021-01-04T13:45:51Z New Revision: d38a0258a5f4c28fd0b0c00705c40e06976ed247 URL: https://github.com/llvm/llvm-project/commit/d38a0258a5f4c28fd0b0c00705c40e06976ed247 DIFF: https://github.com/llvm/llvm-project/commit/d38a0258a5f4c28fd0b0c00705c40e06976ed247.diff LOG: [AArch64] Add patterns for FMCLA*_indexed. This patch adds patterns for the indexed variants of FCMLA. Mostly based on a patch by Tim Northover. Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D92947 Added: Modified: llvm/lib/Target/AArch64/AArch64InstrInfo.td llvm/test/CodeGen/AArch64/neon-vcmla.ll Removed: diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c1d8fd1aba3d..4d70fb334828 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -951,6 +951,7 @@ let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))), (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>; } + let Predicates = [HasComplxNum, HasNEON] in { def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))), (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>; @@ -975,14 +976,34 @@ multiclass FCMLA_PATS { (!cast("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>; } +multiclass FCMLA_LANE_PATS { + def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), +(!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>; + def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), +(!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>; + def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), +(!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>; + def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)), +(!cast("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>; +} + + let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { defm : FCMLA_PATS; defm : FCMLA_PATS; + + defm : FCMLA_LANE_PATS; + defm : FCMLA_LANE_PATS; } let Predicates = [HasComplxNum, HasNEON] in { defm : FCMLA_PATS; defm : FCMLA_PATS; defm : FCMLA_PATS; + + defm : FCMLA_LANE_PATS; } // v8.3a Pointer Authentication diff --git a/llvm/test/CodeGen/AArch64/neon-vcmla.ll b/llvm/test/CodeGen/AArch64/neon-vcmla.ll index 11e2b869abf0..700e17e4b647 100644 --- a/llvm/test/CodeGen/AArch64/neon-vcmla.ll +++ b/llvm/test/CodeGen/AArch64/neon-vcmla.ll @@ -9,6 +9,17 @@ entry: ret <4 x half> %res } +define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +entry: +; CHECK-LABEL: test_16x4_lane_1 +; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[1], #0 +; + %c.cast = bitcast <4 x half> %c to <2 x i32> + %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> + %c.res = bitcast <2 x i32> %c.dup to <4 x half> + %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res) + ret <4 x half> %res +} define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { entry: @@ -19,6 +30,18 @@ entry: ret <4 x half> %res } +define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +entry: +; CHECK-LABEL: test_rot90_16x4_lane_0 +; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #90 +; + %c.cast = bitcast <4 x half> %c to <2 x i32> + %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> + %c.res = bitcast <2 x i32> %c.dup to <4 x half> + %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res) + ret <4 x half> %res +} + define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { entry: ; CHECK-LABEL: test_rot180_16x4 @@ -28,6 +51,18 @@ entry: ret <4 x half> %res } +define <4 x half> @test_rot180_16x4_lane_0(<4 x half> %a, <4 x half> %b, <8 x half> %c) { +entry: +; CHECK-LABEL: test_rot180_16x4_lane_0 +; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #180 + + %c.cast = bitcast <8 x half> %c to <4 x i32> + %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <2 x i32> + %c.res = bitcast <2 x i32> %c.dup to <4 x half> + %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c.res) + ret <4 x half> %res +} + define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { entry: ; CHECK-LABEL: test_rot270_16x4 @@ -82,6 +117,18 @@ entry: ret <8 x half> %res } +define <8 x
[llvm-branch-commits] [llvm] 060cfd9 - [AArch64][SVE]Add cost model for masked gather and scatter for scalable vector.
Author: Caroline Concatto Date: 2021-01-04T13:59:58Z New Revision: 060cfd97954835c3be18e47c631d3efb3e374439 URL: https://github.com/llvm/llvm-project/commit/060cfd97954835c3be18e47c631d3efb3e374439 DIFF: https://github.com/llvm/llvm-project/commit/060cfd97954835c3be18e47c631d3efb3e374439.diff LOG: [AArch64][SVE]Add cost model for masked gather and scatter for scalable vector. A new TTI interface has been added 'Optional getMaxVScale' that returns the maximum vscale for a given target. When known getMaxVScale is used to compute the cost of masked gather scatter for scalable vector. Depends on D92094 Differential Revision: https://reviews.llvm.org/D93030 Added: llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-gather.ll llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-scatter.ll Modified: llvm/include/llvm/Analysis/TargetTransformInfo.h llvm/include/llvm/Analysis/TargetTransformInfoImpl.h llvm/include/llvm/CodeGen/BasicTTIImpl.h llvm/lib/Analysis/TargetTransformInfo.cpp llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h Removed: diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 0953a3b3f451..d9d04429b181 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -928,6 +928,10 @@ class TargetTransformInfo { /// \return The width of the smallest vector register type. unsigned getMinVectorRegisterBitWidth() const; + /// \return The maximum value of vscale if the target specifies an + /// architectural maximum vector length, and None otherwise. + Optional getMaxVScale() const; + /// \return True if the vectorization factor should be chosen to /// make the vector of the smallest element type match the size of a /// vector register. For wider element types, this could result in @@ -1504,6 +1508,7 @@ class TargetTransformInfo::Concept { virtual const char *getRegisterClassName(unsigned ClassID) const = 0; virtual unsigned getRegisterBitWidth(bool Vector) const = 0; virtual unsigned getMinVectorRegisterBitWidth() = 0; + virtual Optional getMaxVScale() const = 0; virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0; virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0; virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; @@ -1921,6 +1926,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { unsigned getMinVectorRegisterBitWidth() override { return Impl.getMinVectorRegisterBitWidth(); } + Optional getMaxVScale() const override { +return Impl.getMaxVScale(); + } bool shouldMaximizeVectorBandwidth(bool OptSize) const override { return Impl.shouldMaximizeVectorBandwidth(OptSize); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 620bfb885b54..ef0653d0d9f4 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -369,6 +369,8 @@ class TargetTransformInfoImplBase { unsigned getMinVectorRegisterBitWidth() const { return 128; } + Optional getMaxVScale() const { return None; } + bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 02f1b73226fc..9776c20400d6 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -571,6 +571,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { unsigned getRegisterBitWidth(bool Vector) const { return 32; } + Optional getMaxVScale() const { return None; } + /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. @@ -1239,8 +1241,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return thisT()->getMemcpyCost(ICA.getInst()); case Intrinsic::masked_scatter: { - if (isa(RetTy)) -return BaseT::getIntrinsicInstrCost(ICA, CostKind); assert(VF.isScalar() && "Can't vectorize types here."); const Value *Mask = Args[3]; bool VarMask = !isa(Mask); @@ -1250,8 +1250,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { VarMask, Alignment, CostKind, I); } case Intrinsic::masked_gather: { - if (isa(RetTy)) -return BaseT::getIntrinsicInstrCost(ICA, CostKind); assert(VF.isScalar() && "Can't v
[llvm-branch-commits] [llvm] 4d7cb6d - [Sparc] SparcMCExpr::printVariantKind - fix Wcovered-switch-default gcc warning. NFCI.
Author: Simon Pilgrim Date: 2021-01-04T14:08:44Z New Revision: 4d7cb6da9fcf980a8ddaa09ffa2dcab1525a66db URL: https://github.com/llvm/llvm-project/commit/4d7cb6da9fcf980a8ddaa09ffa2dcab1525a66db DIFF: https://github.com/llvm/llvm-project/commit/4d7cb6da9fcf980a8ddaa09ffa2dcab1525a66db.diff LOG: [Sparc] SparcMCExpr::printVariantKind - fix Wcovered-switch-default gcc warning. NFCI. Added: Modified: llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp Removed: diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index 2f28a06f1573..b84ecf074455 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -41,49 +41,46 @@ void SparcMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { bool SparcMCExpr::printVariantKind(raw_ostream &OS, VariantKind Kind) { - bool closeParen = true; switch (Kind) { - default: -llvm_unreachable("Unhandled SparcMCExpr::VariantKind"); - case VK_Sparc_None: closeParen = false; break; - case VK_Sparc_LO: OS << "%lo("; break; - case VK_Sparc_HI: OS << "%hi("; break; - case VK_Sparc_H44: OS << "%h44("; break; - case VK_Sparc_M44: OS << "%m44("; break; - case VK_Sparc_L44: OS << "%l44("; break; - case VK_Sparc_HH: OS << "%hh("; break; - case VK_Sparc_HM: OS << "%hm("; break; + case VK_Sparc_None: return false; + case VK_Sparc_LO: OS << "%lo("; return true; + case VK_Sparc_HI: OS << "%hi("; return true; + case VK_Sparc_H44: OS << "%h44("; return true; + case VK_Sparc_M44: OS << "%m44("; return true; + case VK_Sparc_L44: OS << "%l44("; return true; + case VK_Sparc_HH: OS << "%hh("; return true; + case VK_Sparc_HM: OS << "%hm("; return true; // FIXME: use %pc22/%pc10, if system assembler supports them. - case VK_Sparc_PC22: OS << "%hi("; break; - case VK_Sparc_PC10: OS << "%lo("; break; + case VK_Sparc_PC22: OS << "%hi("; return true; + case VK_Sparc_PC10: OS << "%lo("; return true; // FIXME: use %got22/%got10, if system assembler supports them. - case VK_Sparc_GOT22:OS << "%hi("; break; - case VK_Sparc_GOT10:OS << "%lo("; break; - case VK_Sparc_GOT13:closeParen = false; break; - case VK_Sparc_13: closeParen = false; break; - case VK_Sparc_WDISP30: closeParen = false; break; - case VK_Sparc_WPLT30: closeParen = false; break; - case VK_Sparc_R_DISP32: OS << "%r_disp32("; break; - case VK_Sparc_TLS_GD_HI22: OS << "%tgd_hi22("; break; - case VK_Sparc_TLS_GD_LO10: OS << "%tgd_lo10("; break; - case VK_Sparc_TLS_GD_ADD:OS << "%tgd_add(";break; - case VK_Sparc_TLS_GD_CALL: OS << "%tgd_call("; break; - case VK_Sparc_TLS_LDM_HI22: OS << "%tldm_hi22("; break; - case VK_Sparc_TLS_LDM_LO10: OS << "%tldm_lo10("; break; - case VK_Sparc_TLS_LDM_ADD: OS << "%tldm_add("; break; - case VK_Sparc_TLS_LDM_CALL: OS << "%tldm_call("; break; - case VK_Sparc_TLS_LDO_HIX22: OS << "%tldo_hix22("; break; - case VK_Sparc_TLS_LDO_LOX10: OS << "%tldo_lox10("; break; - case VK_Sparc_TLS_LDO_ADD: OS << "%tldo_add("; break; - case VK_Sparc_TLS_IE_HI22: OS << "%tie_hi22("; break; - case VK_Sparc_TLS_IE_LO10: OS << "%tie_lo10("; break; - case VK_Sparc_TLS_IE_LD: OS << "%tie_ld("; break; - case VK_Sparc_TLS_IE_LDX:OS << "%tie_ldx(";break; - case VK_Sparc_TLS_IE_ADD:OS << "%tie_add(";break; - case VK_Sparc_TLS_LE_HIX22: OS << "%tle_hix22("; break; - case VK_Sparc_TLS_LE_LOX10: OS << "%tle_lox10("; break; + case VK_Sparc_GOT22:OS << "%hi("; return true; + case VK_Sparc_GOT10:OS << "%lo("; return true; + case VK_Sparc_GOT13:return false; + case VK_Sparc_13: return false; + case VK_Sparc_WDISP30: return false; + case VK_Sparc_WPLT30: return false; + case VK_Sparc_R_DISP32: OS << "%r_disp32("; return true; + case VK_Sparc_TLS_GD_HI22: OS << "%tgd_hi22("; return true; + case VK_Sparc_TLS_GD_LO10: OS << "%tgd_lo10("; return true; + case VK_Sparc_TLS_GD_ADD:OS << "%tgd_add(";return true; + case VK_Sparc_TLS_GD_CALL: OS << "%tgd_call("; return true; + case VK_Sparc_TLS_LDM_HI22: OS << "%tldm_hi22("; return true; + case VK_Sparc_TLS_LDM_LO10: OS << "%tldm_lo10("; return true; + case VK_Sparc_TLS_LDM_ADD: OS << "%tldm_add("; return true; + case VK_Sparc_TLS_LDM_CALL: OS << "%tldm_call("; return true; + case VK_Sparc_TLS_LDO_HIX22: OS << "%tldo_hix22("; return true; + case VK_Sparc_TLS_LDO_LOX10: OS << "%tldo_lox10("; return true; + case VK_Sparc_TLS_LDO_ADD: OS << "%tldo_add("; return true; + case VK_Sparc_TLS_IE_HI22: OS << "%tie_hi22("; return true; + case VK_Sparc_TLS_IE_LO10: OS << "%tie_lo10("; return true; + case VK_Sparc_TLS_IE_LD: OS <<
[llvm-branch-commits] [openmp] 82a29a6 - [OpenMP] Add definition/interface for target memory routines
Author: Hansang Bae Date: 2021-01-04T08:12:57-06:00 New Revision: 82a29a62aba52d68d37309cd3025370ba98e37e4 URL: https://github.com/llvm/llvm-project/commit/82a29a62aba52d68d37309cd3025370ba98e37e4 DIFF: https://github.com/llvm/llvm-project/commit/82a29a62aba52d68d37309cd3025370ba98e37e4.diff LOG: [OpenMP] Add definition/interface for target memory routines The change includes new routines introduced in 5.1 and Fortran interface. Differential Revision: https://reviews.llvm.org/D93505 Added: Modified: openmp/runtime/src/include/omp.h.var openmp/runtime/src/include/omp_lib.f90.var openmp/runtime/src/include/omp_lib.h.var Removed: diff --git a/openmp/runtime/src/include/omp.h.var b/openmp/runtime/src/include/omp.h.var index b687ff16eaeb..8821377e29d8 100644 --- a/openmp/runtime/src/include/omp.h.var +++ b/openmp/runtime/src/include/omp.h.var @@ -213,6 +213,24 @@ */ extern const char * __KAI_KMPC_CONVENTION omp_get_interop_rc_desc(const omp_interop_rc_t, omp_interop_rc_t); +/* OpenMP 5.1 device memory routines */ + +/*! + * The `omp_target_memcpy_async` routine asynchronously performs a copy between any combination of host and device pointers. + */ +extern int__KAI_KMPC_CONVENTION omp_target_memcpy_async(void *, const void *, size_t, size_t, size_t, int, + int, int, omp_depend_t *); +/*! + * The `omp_target_memcpy_rect_async` routine asynchronously performs a copy between any combination of host and device pointers. + */ +extern int__KAI_KMPC_CONVENTION omp_target_memcpy_rect_async(void *, const void *, size_t, int, const size_t *, + const size_t *, const size_t *, const size_t *, const size_t *, int, int, + int, omp_depend_t *); +/*! + * The `omp_get_mapped_ptr` routine returns the device pointer that is associated with a host pointer for a given device. + */ +extern void * __KAI_KMPC_CONVENTION omp_get_mapped_ptr(const void *, int); + /* kmp API functions */ extern int__KAI_KMPC_CONVENTION kmp_get_stacksize (void); extern void __KAI_KMPC_CONVENTION kmp_set_stacksize (int); diff --git a/openmp/runtime/src/include/omp_lib.f90.var b/openmp/runtime/src/include/omp_lib.f90.var index fbbb7b9df94d..1bde868a505b 100644 --- a/openmp/runtime/src/include/omp_lib.f90.var +++ b/openmp/runtime/src/include/omp_lib.f90.var @@ -509,6 +509,103 @@ end subroutine omp_display_env + function omp_target_alloc(size, device_num) bind(c) +use omp_lib_kinds +type(c_ptr) omp_target_alloc +integer (kind=kmp_size_t_kind), value :: size +integer (kind=omp_integer_kind), value :: device_num + end function omp_target_alloc + + subroutine omp_target_free(device_ptr, device_num) bind(c) +use omp_lib_kinds +type(c_ptr), value :: device_ptr +integer (kind=omp_integer_kind), value :: device_num + end subroutine omp_target_free + + function omp_target_is_present(ptr, device_num) bind(c) +use omp_lib_kinds +integer (kind=omp_integer_kind) omp_target_is_present +type(c_ptr), value :: ptr +integer (kind=omp_integer_kind), value :: device_num + end function omp_target_is_present + + function omp_target_memcpy(dst, src, length, dst_offset, src_offset, & + dst_device_num, src_device_num) bind(c) +use omp_lib_kinds +integer (kind=omp_integer_kind) omp_target_memcpy +type(c_ptr), value :: dst, src +integer (kind=kmp_size_t_kind), value :: length, dst_offset, & +src_offset +integer (kind=omp_integer_kind), value :: dst_device_num, & +src_device_num + end function omp_target_memcpy + + function omp_target_memcpy_rect(dst, src, element_size, num_dims, & + volume, dst_offsets, src_offsets, dst_dimensions, & + src_dimensions, dst_device_num, src_device_num) bind(c) +use omp_lib_kinds +integer (kind=omp_integer_kind) omp_target_memcpy_rect +type(c_ptr), value :: dst, src +integer (kind=kmp_size_t_kind), value :: element_size +integer (kind=omp_integer_kind), value :: num_dims, & +dst_device_num, src_device_num +integer (kind=kmp_size_t_kind), intent(in) :: volume(*), & +dst_offsets(*), src_offsets(*), dst_dimensions(*), & +src_dimensions(*) + end function omp_target_memcpy_rect + + function omp_target_memcpy_async(dst, src,
[llvm-branch-commits] [clang] 9f8c0d1 - DeclCXX - Fix getAs<> null-dereference static analyzer warnings. NFCI.
Author: Simon Pilgrim Date: 2021-01-04T15:12:55Z New Revision: 9f8c0d15c7f706a124ba29e8f40dc1937cd5bd49 URL: https://github.com/llvm/llvm-project/commit/9f8c0d15c7f706a124ba29e8f40dc1937cd5bd49 DIFF: https://github.com/llvm/llvm-project/commit/9f8c0d15c7f706a124ba29e8f40dc1937cd5bd49.diff LOG: DeclCXX - Fix getAs<> null-dereference static analyzer warnings. NFCI. getAs<> can return null if the cast is invalid, which can lead to null pointer deferences. Use castAs<> instead which will assert that the cast is valid. Added: Modified: clang/lib/AST/DeclCXX.cpp Removed: diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp index 16eb8206dba2..b806adf36bfb 100644 --- a/clang/lib/AST/DeclCXX.cpp +++ b/clang/lib/AST/DeclCXX.cpp @@ -1508,7 +1508,7 @@ CXXMethodDecl *CXXRecordDecl::getLambdaCallOperator() const { CXXMethodDecl* CXXRecordDecl::getLambdaStaticInvoker() const { CXXMethodDecl *CallOp = getLambdaCallOperator(); - CallingConv CC = CallOp->getType()->getAs()->getCallConv(); + CallingConv CC = CallOp->getType()->castAs()->getCallConv(); return getLambdaStaticInvoker(CC); } @@ -1532,8 +1532,8 @@ CXXMethodDecl *CXXRecordDecl::getLambdaStaticInvoker(CallingConv CC) const { DeclContext::lookup_result Invoker = getLambdaStaticInvokers(*this); for (NamedDecl *ND : Invoker) { -const FunctionType *FTy = -cast(ND->getAsFunction())->getType()->getAs(); +const auto *FTy = + cast(ND->getAsFunction())->getType()->castAs(); if (FTy->getCallConv() == CC) return getInvokerAsMethod(ND); } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e9f401d - [IR] CallBase::getBundleOpInfoForOperand - ensure Current iterator is defined. NFCI.
Author: Simon Pilgrim Date: 2021-01-04T15:30:15Z New Revision: e9f401d8a261e747f5dfc9e297f12ab26e56893d URL: https://github.com/llvm/llvm-project/commit/e9f401d8a261e747f5dfc9e297f12ab26e56893d DIFF: https://github.com/llvm/llvm-project/commit/e9f401d8a261e747f5dfc9e297f12ab26e56893d.diff LOG: [IR] CallBase::getBundleOpInfoForOperand - ensure Current iterator is defined. NFCI. Fix clang static analyzer undefined pointer warning in the case Begin == End. Added: Modified: llvm/lib/IR/Instructions.cpp Removed: diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 47bf3966bc27..d6b4a4f5030f 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -400,7 +400,7 @@ CallBase::BundleOpInfo &CallBase::getBundleOpInfoForOperand(unsigned OpIdx) { bundle_op_iterator Begin = bundle_op_info_begin(); bundle_op_iterator End = bundle_op_info_end(); - bundle_op_iterator Current; + bundle_op_iterator Current = Begin; while (Begin != End) { unsigned ScaledOperandPerBundle = ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ed936aa - [InterleavedAccess] Return correct 'modified' status.
Author: Florian Hahn Date: 2021-01-04T15:49:47Z New Revision: ed936aad7814404b3cc767d4515096f078dfcbb9 URL: https://github.com/llvm/llvm-project/commit/ed936aad7814404b3cc767d4515096f078dfcbb9 DIFF: https://github.com/llvm/llvm-project/commit/ed936aad7814404b3cc767d4515096f078dfcbb9.diff LOG: [InterleavedAccess] Return correct 'modified' status. Both tryReplaceExtracts and replaceBinOpShuffles may modify the IR, even if no interleaved loads are generated, but currently the pass pretends no changes were made. This patch updates the pass to return true if either of the functions made any changes. In case of tryReplaceExtracts, changes are made if there are any Extracts and true is returned. `replaceBinOpShuffles` always makes changes if BinOpShuffles is not empty. It also always returned true, so I went ahead and change it to just `replaceBinOpShuffles`. Fixes PR48208. Reviewed By: SjoerdMeijer Differential Revision: https://reviews.llvm.org/D93997 Added: llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll Modified: llvm/lib/CodeGen/InterleavedAccessPass.cpp Removed: diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 73771609a792..6e1621450755 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -123,10 +123,11 @@ class InterleavedAccess : public FunctionPass { /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them /// to binop(shuffle(x), shuffle(y)) to allow the formation of an /// interleaving load. Any newly created shuffles that operate on \p LI will - /// be added to \p Shuffles. - bool tryReplaceBinOpShuffles(ArrayRef BinOpShuffles, - SmallVectorImpl &Shuffles, - LoadInst *LI); + /// be added to \p Shuffles. Returns true, if any changes to the IR have been + /// made. + bool replaceBinOpShuffles(ArrayRef BinOpShuffles, +SmallVectorImpl &Shuffles, +LoadInst *LI); }; } // end anonymous namespace. @@ -369,14 +370,17 @@ bool InterleavedAccess::lowerInterleavedLoad( // use the shufflevector instructions instead of the load. if (!tryReplaceExtracts(Extracts, Shuffles)) return false; - if (!tryReplaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI)) -return false; + + bool BinOpShuffleChanged = + replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI); LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); // Try to create target specific intrinsics to replace the load and shuffles. - if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) -return false; + if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) { +// If Extracts is not empty, tryReplaceExtracts made changes earlier. +return !Extracts.empty() || BinOpShuffleChanged; + } for (auto SVI : Shuffles) DeadInsts.push_back(SVI); @@ -385,7 +389,7 @@ bool InterleavedAccess::lowerInterleavedLoad( return true; } -bool InterleavedAccess::tryReplaceBinOpShuffles( +bool InterleavedAccess::replaceBinOpShuffles( ArrayRef BinOpShuffles, SmallVectorImpl &Shuffles, LoadInst *LI) { for (auto *SVI : BinOpShuffles) { @@ -410,7 +414,8 @@ bool InterleavedAccess::tryReplaceBinOpShuffles( if (NewSVI2->getOperand(0) == LI) Shuffles.push_back(NewSVI2); } - return true; + + return !BinOpShuffles.empty(); } bool InterleavedAccess::tryReplaceExtracts( diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll new file mode 100644 index ..80f3195699dc --- /dev/null +++ b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -interleaved-access -S %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.15.0" + +; No interleaved load instruction is generated, but the shuffle is moved just +; after the load. +define <2 x double> @shuffle_binop_fol(<4 x double>* %ptr) { +; CHECK-LABEL: @shuffle_binop_fol( +; CHECK-NEXT: vector.body.preheader: +; CHECK-NEXT:[[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[PTR:%.*]], align 8 +; CHECK-NEXT:[[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> undef, <2 x i32> +; CHECK-NEXT:[[EXTRACTED2:%.*]] = shufflevector <4 x double> , <4 x double> undef, <2 x i32> +; CHECK-NEXT:[[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]] +; CHECK-NEXT:ret <2 x do
[llvm-branch-commits] [llvm] e2d3d50 - [RISCV][NFC] Add additional cmov tests
Author: Michael Munday Date: 2021-01-04T16:01:40Z New Revision: e2d3d501ef8b49eb8990dd3556948373b023cd48 URL: https://github.com/llvm/llvm-project/commit/e2d3d501ef8b49eb8990dd3556948373b023cd48 DIFF: https://github.com/llvm/llvm-project/commit/e2d3d501ef8b49eb8990dd3556948373b023cd48.diff LOG: [RISCV][NFC] Add additional cmov tests One or more cmov instructions could be generated for these functions when the Zbt extension is present. Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D93768 Added: Modified: llvm/test/CodeGen/RISCV/rv32Zbt.ll llvm/test/CodeGen/RISCV/rv64Zbt.ll Removed: diff --git a/llvm/test/CodeGen/RISCV/rv32Zbt.ll b/llvm/test/CodeGen/RISCV/rv32Zbt.ll index 68501812f2c6..7011698c13a9 100644 --- a/llvm/test/CodeGen/RISCV/rv32Zbt.ll +++ b/llvm/test/CodeGen/RISCV/rv32Zbt.ll @@ -86,15 +86,143 @@ define i32 @cmov_i32(i32 %a, i32 %b, i32 %c) nounwind { ret i32 %cond } +define i32 @cmov_sle_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; RV32I-LABEL: cmov_sle_i32: +; RV32I: # %bb.0: +; RV32I-NEXT:bge a2, a1, .LBB3_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT:mv a3, a0 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT:mv a0, a3 +; RV32I-NEXT:ret +; +; RV32IB-LABEL: cmov_sle_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT:bge a2, a1, .LBB3_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT:mv a3, a0 +; RV32IB-NEXT: .LBB3_2: +; RV32IB-NEXT:mv a0, a3 +; RV32IB-NEXT:ret +; +; RV32IBT-LABEL: cmov_sle_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT:bge a2, a1, .LBB3_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT:mv a3, a0 +; RV32IBT-NEXT: .LBB3_2: +; RV32IBT-NEXT:mv a0, a3 +; RV32IBT-NEXT:ret + %tobool = icmp sle i32 %b, %c + %cond = select i1 %tobool, i32 %d, i32 %a + ret i32 %cond +} + +define i32 @cmov_sge_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; RV32I-LABEL: cmov_sge_i32: +; RV32I: # %bb.0: +; RV32I-NEXT:bge a1, a2, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT:mv a3, a0 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT:mv a0, a3 +; RV32I-NEXT:ret +; +; RV32IB-LABEL: cmov_sge_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT:bge a1, a2, .LBB4_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT:mv a3, a0 +; RV32IB-NEXT: .LBB4_2: +; RV32IB-NEXT:mv a0, a3 +; RV32IB-NEXT:ret +; +; RV32IBT-LABEL: cmov_sge_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT:bge a1, a2, .LBB4_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT:mv a3, a0 +; RV32IBT-NEXT: .LBB4_2: +; RV32IBT-NEXT:mv a0, a3 +; RV32IBT-NEXT:ret + %tobool = icmp sge i32 %b, %c + %cond = select i1 %tobool, i32 %d, i32 %a + ret i32 %cond +} + +define i32 @cmov_ule_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; RV32I-LABEL: cmov_ule_i32: +; RV32I: # %bb.0: +; RV32I-NEXT:bgeu a2, a1, .LBB5_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT:mv a3, a0 +; RV32I-NEXT: .LBB5_2: +; RV32I-NEXT:mv a0, a3 +; RV32I-NEXT:ret +; +; RV32IB-LABEL: cmov_ule_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT:bgeu a2, a1, .LBB5_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT:mv a3, a0 +; RV32IB-NEXT: .LBB5_2: +; RV32IB-NEXT:mv a0, a3 +; RV32IB-NEXT:ret +; +; RV32IBT-LABEL: cmov_ule_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT:bgeu a2, a1, .LBB5_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT:mv a3, a0 +; RV32IBT-NEXT: .LBB5_2: +; RV32IBT-NEXT:mv a0, a3 +; RV32IBT-NEXT:ret + %tobool = icmp ule i32 %b, %c + %cond = select i1 %tobool, i32 %d, i32 %a + ret i32 %cond +} + +define i32 @cmov_uge_i32(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; RV32I-LABEL: cmov_uge_i32: +; RV32I: # %bb.0: +; RV32I-NEXT:bgeu a1, a2, .LBB6_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT:mv a3, a0 +; RV32I-NEXT: .LBB6_2: +; RV32I-NEXT:mv a0, a3 +; RV32I-NEXT:ret +; +; RV32IB-LABEL: cmov_uge_i32: +; RV32IB: # %bb.0: +; RV32IB-NEXT:bgeu a1, a2, .LBB6_2 +; RV32IB-NEXT: # %bb.1: +; RV32IB-NEXT:mv a3, a0 +; RV32IB-NEXT: .LBB6_2: +; RV32IB-NEXT:mv a0, a3 +; RV32IB-NEXT:ret +; +; RV32IBT-LABEL: cmov_uge_i32: +; RV32IBT: # %bb.0: +; RV32IBT-NEXT:bgeu a1, a2, .LBB6_2 +; RV32IBT-NEXT: # %bb.1: +; RV32IBT-NEXT:mv a3, a0 +; RV32IBT-NEXT: .LBB6_2: +; RV32IBT-NEXT:mv a0, a3 +; RV32IBT-NEXT:ret + %tobool = icmp uge i32 %b, %c + %cond = select i1 %tobool, i32 %d, i32 %a + ret i32 %cond +} + define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: cmov_i64: ; RV32I: # %bb.0: ; RV32I-NEXT:or a2, a2, a3 -; RV32I-NEXT:beqz a2, .LBB3_2 +; RV32I-NEXT:beqz a2, .LBB7_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT:mv a4, a0 ; RV32I-NEXT:mv a5, a1 -; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: .LBB7_2: ; RV32I-NEXT:mv a0, a4 ; RV32I-NEXT:mv a1, a5 ; RV32I-NEXT:ret @@ -117,6 +245,266 @@ define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind { ret i6
[llvm-branch-commits] [llvm] c367258 - [SimplifyCFG] Enabled hoisting late in LTO pipeline.
Author: Florian Hahn Date: 2021-01-04T16:26:58Z New Revision: c367258b5cc257973f49508c1ac5763cb077428e URL: https://github.com/llvm/llvm-project/commit/c367258b5cc257973f49508c1ac5763cb077428e DIFF: https://github.com/llvm/llvm-project/commit/c367258b5cc257973f49508c1ac5763cb077428e.diff LOG: [SimplifyCFG] Enabled hoisting late in LTO pipeline. bb7d3af1139c disabled hoisting in SimplifyCFG by default, but enabled it late in the pipeline. But it appears as if the LTO pipelines got missed. This patch adjusts the LTO pipelines to also enable hoisting in the later stages. Unfortunately there's no easy way to add a test for the change I think. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D93684 Added: Modified: llvm/lib/Passes/PassBuilder.cpp llvm/lib/Transforms/IPO/PassManagerBuilder.cpp Removed: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 95f58d9e3f733..96d96d43b4327 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1715,7 +1715,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // are sorted out. MainFPM.addPass(InstCombinePass()); - MainFPM.addPass(SimplifyCFGPass()); + MainFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))); MainFPM.addPass(SCCPPass()); MainFPM.addPass(InstCombinePass()); MainFPM.addPass(BDCEPass()); @@ -1755,7 +1755,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Add late LTO optimization passes. // Delete basic blocks, which optimization passes may have killed. - MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); + MPM.addPass(createModuleToFunctionPassAdaptor( + SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true; // Drop bodies of available eternally objects to improve GlobalDCE. MPM.addPass(EliminateAvailableExternallyPass()); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index e7b5414a38414..33fa158e70ca9 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -1087,7 +1087,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. PM.add(createInstructionCombiningPass()); // Initial cleanup - PM.add(createCFGSimplificationPass()); // if-convert + PM.add(createCFGSimplificationPass(SimplifyCFGOptions() // if-convert + .hoistCommonInsts(true))); PM.add(createSCCPPass()); // Propagate exposed constants PM.add(createInstructionCombiningPass()); // Clean up again PM.add(createBitTrackingDCEPass()); @@ -1117,7 +1118,8 @@ void PassManagerBuilder::addLateLTOOptimizationPasses( PM.add(createHotColdSplittingPass()); // Delete basic blocks, which optimization passes may have killed. - PM.add(createCFGSimplificationPass()); + PM.add( + createCFGSimplificationPass(SimplifyCFGOptions().hoistCommonInsts(true))); // Drop bodies of available externally objects to improve GlobalDCE. PM.add(createEliminateAvailableExternallyPass()); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] c55b609 - [Hexagon] Fix bad SDNodeXForm
Author: Krzysztof Parzyszek Date: 2021-01-04T10:43:01-06:00 New Revision: c55b609b777d59b4c174842ae87784626c2a4738 URL: https://github.com/llvm/llvm-project/commit/c55b609b777d59b4c174842ae87784626c2a4738 DIFF: https://github.com/llvm/llvm-project/commit/c55b609b777d59b4c174842ae87784626c2a4738.diff LOG: [Hexagon] Fix bad SDNodeXForm Fixes https://llvm.org/PR48651 Added: llvm/test/CodeGen/Hexagon/isel-splat-vector-neg-i8.ll Modified: llvm/lib/Target/Hexagon/HexagonPatterns.td Removed: diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index fa91f7a31b14..d216c511a994 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -232,8 +232,8 @@ def NegImm32: SDNodeXFormgetZExtValue(); assert(isUInt<8>(V) || V >> 8 == 0xFF); - uint32_t S = V << 24 | V << 16 | V << 8 | V; V &= 0xFF; + uint32_t S = V << 24 | V << 16 | V << 8 | V; return CurDAG->getTargetConstant(S, SDLoc(N), MVT::i32); }]>; diff --git a/llvm/test/CodeGen/Hexagon/isel-splat-vector-neg-i8.ll b/llvm/test/CodeGen/Hexagon/isel-splat-vector-neg-i8.ll new file mode 100644 index ..9bd5bde81d94 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/isel-splat-vector-neg-i8.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon < %s | FileCheck %s + +define <4 x i8> @fred() #0 { +; CHECK-LABEL: fred: +; CHECK: // %bb.0: +; CHECK-NEXT:{ +; CHECK-NEXT: r0 = ##-16843010 +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT:} + %v0 = insertelement <4 x i8> undef, i8 -2, i32 0 + %v1 = shufflevector <4 x i8> %v0, <4 x i8> undef, <4 x i32> zeroinitializer + ret <4 x i8> %v1 +} + +attributes #0 = { nounwind readnone } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] 76bfbb7 - [libomptarget][amdgpu] Call into deviceRTL instead of ockl
Author: Jon Chesterfield Date: 2021-01-04T16:48:47Z New Revision: 76bfbb74d38b611f150e8e1a4becc11be95703da URL: https://github.com/llvm/llvm-project/commit/76bfbb74d38b611f150e8e1a4becc11be95703da DIFF: https://github.com/llvm/llvm-project/commit/76bfbb74d38b611f150e8e1a4becc11be95703da.diff LOG: [libomptarget][amdgpu] Call into deviceRTL instead of ockl [libomptarget][amdgpu] Call into deviceRTL instead of ockl Amdgpu codegen presently emits a call into ockl. The same functionality is already present in the deviceRTL. Adds an amdgpu specific entry point to avoid the dependency. This lets simple openmp code (specifically, that which doesn't use libm) run without rocm device libraries installed. Reviewed By: ronlieb Differential Revision: https://reviews.llvm.org/D93356 Added: Modified: clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp clang/test/OpenMP/amdgcn_target_codegen.cpp openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip Removed: diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp index ccffdf43549f..33d4ab838af1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp @@ -49,13 +49,12 @@ llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) { llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUNumThreads(CodeGenFunction &CGF) { CGBuilderTy &Bld = CGF.Builder; llvm::Module *M = &CGF.CGM.getModule(); - const char *LocSize = "__ockl_get_local_size"; + const char *LocSize = "__kmpc_amdgcn_gpu_num_threads"; llvm::Function *F = M->getFunction(LocSize); if (!F) { F = llvm::Function::Create( -llvm::FunctionType::get(CGF.Int64Ty, {CGF.Int32Ty}, false), +llvm::FunctionType::get(CGF.Int32Ty, llvm::None, false), llvm::GlobalVariable::ExternalLinkage, LocSize, &CGF.CGM.getModule()); } - return Bld.CreateTrunc( - Bld.CreateCall(F, {Bld.getInt32(0)}, "nvptx_num_threads"), CGF.Int32Ty); + return Bld.CreateCall(F, llvm::None, "nvptx_num_threads"); } diff --git a/clang/test/OpenMP/amdgcn_target_codegen.cpp b/clang/test/OpenMP/amdgcn_target_codegen.cpp index 85ef69942a50..416ed06083b0 100644 --- a/clang/test/OpenMP/amdgcn_target_codegen.cpp +++ b/clang/test/OpenMP/amdgcn_target_codegen.cpp @@ -13,9 +13,8 @@ int test_amdgcn_target_tid_threads() { int arr[N]; -// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) -// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 -// CHECK-NEXT: sub nuw i32 [[VAR]], 64 +// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads() +// CHECK: sub nuw i32 [[NUM_THREADS]], 64 // CHECK: call i32 @llvm.amdgcn.workitem.id.x() #pragma omp target for (int i = 0; i < N; i++) { @@ -30,9 +29,8 @@ int test_amdgcn_target_tid_threads_simd() { int arr[N]; -// CHECK: [[NUM_THREADS:%.+]] = call i64 @__ockl_get_local_size(i32 0) -// CHECK-NEXT: [[VAR:%.+]] = trunc i64 [[NUM_THREADS]] to i32 -// CHECK-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[VAR]], i16 0) +// CHECK: [[NUM_THREADS:%.+]] = call i32 @__kmpc_amdgcn_gpu_num_threads() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[NUM_THREADS]], i16 0) #pragma omp target simd for (int i = 0; i < N; i++) { arr[i] = 1; diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h index f7c75c09362a..80409d611f6f 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h @@ -15,4 +15,6 @@ typedef uint64_t __kmpc_impl_lanemask_t; typedef uint32_t omp_lock_t; /* arbitrary type of the right length */ +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads(); + #endif diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip index 9fbdc67b56ab..3e70beb85d5b 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip @@ -144,6 +144,10 @@ DEVICE unsigned GetLaneId() { return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u)); } +EXTERN uint32_t __kmpc_amdgcn_gpu_num_threads() { + return GetNumberOfThreadsInBlock(); +} + // Stub implementations -DEVICE void *__kmpc_impl_malloc(size_t ) { return nullptr } +DEVICE void *__kmpc_impl_malloc(size_t) { return nullptr; } DEVICE void __kmpc_impl_free(void *) {} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] f7463ca - [ProfileData] GCOVFile::readGCNO - silence undefined pointer warning. NFCI.
Author: Simon Pilgrim Date: 2021-01-04T16:50:05Z New Revision: f7463ca3cc5ba8455c4611c5afa79c48d8a79326 URL: https://github.com/llvm/llvm-project/commit/f7463ca3cc5ba8455c4611c5afa79c48d8a79326 DIFF: https://github.com/llvm/llvm-project/commit/f7463ca3cc5ba8455c4611c5afa79c48d8a79326.diff LOG: [ProfileData] GCOVFile::readGCNO - silence undefined pointer warning. NFCI. Silence clang static analyzer warning that 'fn' could still be in an undefined state - this shouldn't happen depending on the likely tag order, but the analyzer can't know that. Added: Modified: llvm/lib/ProfileData/GCOV.cpp Removed: diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp index 2e1ba3338394..3332a898603b 100644 --- a/llvm/lib/ProfileData/GCOV.cpp +++ b/llvm/lib/ProfileData/GCOV.cpp @@ -111,7 +111,7 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) { buf.getWord(); // hasUnexecutedBlocks uint32_t tag, length; - GCOVFunction *fn; + GCOVFunction *fn = nullptr; while ((tag = buf.getWord())) { if (!buf.readInt(length)) return false; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] fe5d51a - [OpenMP] Add using bit flags to select Libomptarget Information
Author: Joseph Huber Date: 2021-01-04T12:03:15-05:00 New Revision: fe5d51a4897c26696fede55e120c912df60cd3f4 URL: https://github.com/llvm/llvm-project/commit/fe5d51a4897c26696fede55e120c912df60cd3f4 DIFF: https://github.com/llvm/llvm-project/commit/fe5d51a4897c26696fede55e120c912df60cd3f4.diff LOG: [OpenMP] Add using bit flags to select Libomptarget Information Summary: This patch adds more fine-grained support over which information is output from the libomptarget runtime when run with the environment variable LIBOMPTARGET_INFO set. An extensible set of flags can be used to pick and choose which information the user is interested in. Reviewers: jdoerfert JonChesterfield grokos Differential Revision: https://reviews.llvm.org/D93727 Added: Modified: openmp/libomptarget/include/Debug.h openmp/libomptarget/include/SourceInfo.h openmp/libomptarget/plugins/cuda/src/rtl.cpp openmp/libomptarget/src/device.cpp openmp/libomptarget/src/interface.cpp openmp/libomptarget/src/private.h openmp/libomptarget/test/offloading/info.c Removed: diff --git a/openmp/libomptarget/include/Debug.h b/openmp/libomptarget/include/Debug.h index 4f42794e1bca..de593ecf5c3e 100644 --- a/openmp/libomptarget/include/Debug.h +++ b/openmp/libomptarget/include/Debug.h @@ -37,24 +37,38 @@ #ifndef _OMPTARGET_DEBUG_H #define _OMPTARGET_DEBUG_H -static inline int getInfoLevel() { - static int InfoLevel = -1; - if (InfoLevel >= 0) -return InfoLevel; - - if (char *EnvStr = getenv("LIBOMPTARGET_INFO")) -InfoLevel = std::stoi(EnvStr); +#include + +/// 32-Bit field data attributes controlling information presented to the user. +enum OpenMPInfoType : uint32_t { + // Print data arguments and attributes upon entering an OpenMP device kernel. + OMP_INFOTYPE_KERNEL_ARGS = 0x0001, + // Indicate when an address already exists in the device mapping table. + OMP_INFOTYPE_MAPPING_EXISTS = 0x0002, + // Dump the contents of the device pointer map at kernel exit or failure. + OMP_INFOTYPE_DUMP_TABLE = 0x0004, + // Print kernel information from target device plugins + OMP_INFOTYPE_PLUGIN_KERNEL = 0x0010, +}; + +static inline uint32_t getInfoLevel() { + static uint32_t InfoLevel = 0; + static std::once_flag Flag{}; + std::call_once(Flag, []() { +if (char *EnvStr = getenv("LIBOMPTARGET_INFO")) + InfoLevel = std::stoi(EnvStr); + }); return InfoLevel; } -static inline int getDebugLevel() { - static int DebugLevel = -1; - if (DebugLevel >= 0) -return DebugLevel; - - if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG")) -DebugLevel = std::stoi(EnvStr); +static inline uint32_t getDebugLevel() { + static uint32_t DebugLevel = 0; + static std::once_flag Flag{}; + std::call_once(Flag, []() { +if (char *EnvStr = getenv("LIBOMPTARGET_DEBUG")) + DebugLevel = std::stoi(EnvStr); + }); return DebugLevel; } @@ -107,7 +121,7 @@ static inline int getDebugLevel() { /// Print a generic information string used if LIBOMPTARGET_INFO=1 #define INFO_MESSAGE(_num, ...) \ do { \ -fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", _num); \ +fprintf(stderr, GETNAME(TARGET_NAME) " device %d info: ", (int)_num); \ fprintf(stderr, __VA_ARGS__); \ } while (0) diff --git a/openmp/libomptarget/include/SourceInfo.h b/openmp/libomptarget/include/SourceInfo.h index 614f99e62afe..c659d916837b 100644 --- a/openmp/libomptarget/include/SourceInfo.h +++ b/openmp/libomptarget/include/SourceInfo.h @@ -54,6 +54,13 @@ class SourceInfo { return std::string(reinterpret_cast(name)); } + std::string initStr(const ident_t *loc) { +if (!loc) + return ";unknown;unknown;0;0;;"; +else + return std::string(reinterpret_cast(loc->psource)); + } + /// Get n-th substring in an expression separated by ;. std::string getSubstring(const int n) const { std::size_t begin = sourceStr.find(';'); @@ -73,7 +80,7 @@ class SourceInfo { public: SourceInfo(const ident_t *loc) - : sourceStr(initStr(loc->psource)), name(getSubstring(1)), + : sourceStr(initStr(loc)), name(getSubstring(1)), filename(removePath(getSubstring(0))), line(std::stoi(getSubstring(2))), column(std::stoi(getSubstring(3))) {} diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp index 0422bfbfe319..4fac6a76710e 100644 --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -501,11 +501,12 @@ class DeviceRTLTy { DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit; } -INFO(DeviceId, - "Device supports up to %d CUDA blocks and %d threads with a " -
[llvm-branch-commits] [llvm] dd6bb36 - [LoopDeletion] Break backedge of loops when known not taken
Author: Philip Reames Date: 2021-01-04T09:19:29-08:00 New Revision: dd6bb367d19e3bf18353e40de54d35480999a930 URL: https://github.com/llvm/llvm-project/commit/dd6bb367d19e3bf18353e40de54d35480999a930 DIFF: https://github.com/llvm/llvm-project/commit/dd6bb367d19e3bf18353e40de54d35480999a930.diff LOG: [LoopDeletion] Break backedge of loops when known not taken The basic idea is that if SCEV can prove the backedge isn't taken, we can go ahead and get rid of the backedge (and thus the loop) while leaving the rest of the control in place. This nicely handles cases with dispatch between multiple exits and internal side effects. Differential Revision: https://reviews.llvm.org/D93906 Added: llvm/test/Transforms/LoopDeletion/zero-btc.ll Modified: llvm/include/llvm/Transforms/Utils/LoopUtils.h llvm/lib/Transforms/Scalar/LoopDeletion.cpp llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll llvm/test/Transforms/LoopDeletion/update-scev.ll Removed: diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index b29add4cba0e5..82c0d9e070d78 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -179,6 +179,12 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA = nullptr); +/// Remove the backedge of the specified loop. Handles loop nests and general +/// loop structures subject to the precondition that the loop has a single +/// latch block. Preserves all listed analyses. +void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, + LoopInfo &LI, MemorySSA *MSSA); + /// Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index 065db647561ec..04120032f0f41 100644 --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -26,6 +26,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" + using namespace llvm; #define DEBUG_TYPE "loop-delete" @@ -38,6 +39,14 @@ enum class LoopDeletionResult { Deleted, }; +static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) { + if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted) +return LoopDeletionResult::Deleted; + if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified) +return LoopDeletionResult::Modified; + return LoopDeletionResult::Unmodified; +} + /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -126,6 +135,26 @@ static bool isLoopNeverExecuted(Loop *L) { return true; } +/// If we can prove the backedge is untaken, remove it. This destroys the +/// loop, but leaves the (now trivially loop invariant) control flow and +/// side effects (if any) in place. +static LoopDeletionResult +breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, +LoopInfo &LI, MemorySSA *MSSA, +OptimizationRemarkEmitter &ORE) { + assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); + + if (!L->getLoopLatch()) +return LoopDeletionResult::Unmodified; + + auto *BTC = SE.getBackedgeTakenCount(L); + if (!BTC->isZero()) +return LoopDeletionResult::Unmodified; + + breakLoopBackedge(L, DT, SE, LI, MSSA); + return LoopDeletionResult::Deleted; +} + /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of @@ -162,7 +191,6 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, return LoopDeletionResult::Unmodified; } - BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (ExitBlock && isLoopNeverExecuted(L)) { @@ -240,6 +268,14 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE); + + // If we can prove the backedge isn't taken, just break it and be done. This + // leaves the loop structure in place which means it can handle dispatching + // to the right exit based on whatever loop invariant structure remains. + if (Result != LoopDeletionR
[llvm-branch-commits] [llvm] 7c63aac - Revert "[LoopDeletion] Break backedge of loops when known not taken"
Author: Philip Reames Date: 2021-01-04T09:50:47-08:00 New Revision: 7c63aac7bd4e5ce3402f2ef7c1d5b66047230147 URL: https://github.com/llvm/llvm-project/commit/7c63aac7bd4e5ce3402f2ef7c1d5b66047230147 DIFF: https://github.com/llvm/llvm-project/commit/7c63aac7bd4e5ce3402f2ef7c1d5b66047230147.diff LOG: Revert "[LoopDeletion] Break backedge of loops when known not taken" This reverts commit dd6bb367d19e3bf18353e40de54d35480999a930. Multi-stage builders are showing an assertion failure w/LCSSA not being preserved on entry to IndVars. Reason isn't clear, reverting while investigating. Added: Modified: llvm/include/llvm/Transforms/Utils/LoopUtils.h llvm/lib/Transforms/Scalar/LoopDeletion.cpp llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/test/Transforms/IndVarSimplify/X86/pr45360.ll llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll llvm/test/Transforms/LoopDeletion/update-scev.ll Removed: llvm/test/Transforms/LoopDeletion/zero-btc.ll diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 82c0d9e070d7..b29add4cba0e 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -179,12 +179,6 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA = nullptr); -/// Remove the backedge of the specified loop. Handles loop nests and general -/// loop structures subject to the precondition that the loop has a single -/// latch block. Preserves all listed analyses. -void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, - LoopInfo &LI, MemorySSA *MSSA); - /// Try to promote memory values to scalars by sinking stores out of /// the loop and moving loads to before the loop. We do this by looping over /// the stores in the loop, looking for stores to Must pointers which are diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index 04120032f0f4..065db647561e 100644 --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -26,7 +26,6 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/LoopUtils.h" - using namespace llvm; #define DEBUG_TYPE "loop-delete" @@ -39,14 +38,6 @@ enum class LoopDeletionResult { Deleted, }; -static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) { - if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted) -return LoopDeletionResult::Deleted; - if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified) -return LoopDeletionResult::Modified; - return LoopDeletionResult::Unmodified; -} - /// Determines if a loop is dead. /// /// This assumes that we've already checked for unique exit and exiting blocks, @@ -135,26 +126,6 @@ static bool isLoopNeverExecuted(Loop *L) { return true; } -/// If we can prove the backedge is untaken, remove it. This destroys the -/// loop, but leaves the (now trivially loop invariant) control flow and -/// side effects (if any) in place. -static LoopDeletionResult -breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, -LoopInfo &LI, MemorySSA *MSSA, -OptimizationRemarkEmitter &ORE) { - assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); - - if (!L->getLoopLatch()) -return LoopDeletionResult::Unmodified; - - auto *BTC = SE.getBackedgeTakenCount(L); - if (!BTC->isZero()) -return LoopDeletionResult::Unmodified; - - breakLoopBackedge(L, DT, SE, LI, MSSA); - return LoopDeletionResult::Deleted; -} - /// Remove a loop if it is dead. /// /// A loop is considered dead if it does not impact the observable behavior of @@ -191,6 +162,7 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, return LoopDeletionResult::Unmodified; } + BasicBlock *ExitBlock = L->getUniqueExitBlock(); if (ExitBlock && isLoopNeverExecuted(L)) { @@ -268,14 +240,6 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE); - - // If we can prove the backedge isn't taken, just break it and be done. This - // leaves the loop structure in place which means it can handle dispatching - // to the right exit based on whatever loop invariant structure remains. - if (Result != LoopDeletionResult::Deleted) -Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI, -
[llvm-branch-commits] [llvm] d8938c8 - CodeGen: Use Register
Author: Matt Arsenault Date: 2021-01-04T12:53:06-05:00 New Revision: d8938c8bb5479b168d27d3e161cb3a53e8ff09f0 URL: https://github.com/llvm/llvm-project/commit/d8938c8bb5479b168d27d3e161cb3a53e8ff09f0 DIFF: https://github.com/llvm/llvm-project/commit/d8938c8bb5479b168d27d3e161cb3a53e8ff09f0.diff LOG: CodeGen: Use Register Added: Modified: llvm/include/llvm/CodeGen/MachineFrameInfo.h llvm/lib/Target/RISCV/RISCVFrameLowering.cpp Removed: diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 5cd7f9cde674..7f0ec0df57c5 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -14,6 +14,7 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Register.h" #include "llvm/Support/Alignment.h" #include "llvm/Support/DataTypes.h" #include @@ -31,7 +32,7 @@ class AllocaInst; /// Callee saved reg can also be saved to a diff erent register rather than /// on the stack by setting DstReg instead of FrameIdx. class CalleeSavedInfo { - unsigned Reg; + Register Reg; union { int FrameIdx; unsigned DstReg; @@ -58,14 +59,14 @@ class CalleeSavedInfo { : Reg(R), FrameIdx(FI), Restored(true), SpilledToReg(false) {} // Accessors. - unsigned getReg()const { return Reg; } + Register getReg()const { return Reg; } int getFrameIdx()const { return FrameIdx; } unsigned getDstReg() const { return DstReg; } void setFrameIdx(int FI) { FrameIdx = FI; SpilledToReg = false; } - void setDstReg(unsigned SpillReg) { + void setDstReg(Register SpillReg) { DstReg = SpillReg; SpilledToReg = true; } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 3dd68fa36d45..26ff6ddcd048 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -138,7 +138,7 @@ static int getLibCallID(const MachineFunction &MF, // RISCVRegisterInfo::hasReservedSpillSlot assigns negative frame indexes to // registers which can be saved by libcall. if (CS.getFrameIdx() < 0) - MaxReg = std::max(MaxReg.id(), CS.getReg()); + MaxReg = std::max(MaxReg.id(), CS.getReg().id()); if (MaxReg == RISCV::NoRegister) return -1; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 6976812 - [InstCombine] add tests for ashr+icmp; NFC
Author: Sanjay Patel Date: 2021-01-04T13:35:07-05:00 New Revision: 6976812129bf62975e37f6eabced717dcd090037 URL: https://github.com/llvm/llvm-project/commit/6976812129bf62975e37f6eabced717dcd090037 DIFF: https://github.com/llvm/llvm-project/commit/6976812129bf62975e37f6eabced717dcd090037.diff LOG: [InstCombine] add tests for ashr+icmp; NFC Added: Modified: llvm/test/Transforms/InstCombine/icmp-shr.ll Removed: diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll index 214f315f3178..22f61d2d5e6a 100644 --- a/llvm/test/Transforms/InstCombine/icmp-shr.ll +++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8: define i1 @lshr_eq_msb_low_last_zero(i8 %a) { ; CHECK-LABEL: @lshr_eq_msb_low_last_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 %a, 6 +; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 [[A:%.*]], 6 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -15,7 +15,7 @@ define i1 @lshr_eq_msb_low_last_zero(i8 %a) { define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) { ; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec( -; CHECK-NEXT:[[CMP:%.*]] = icmp ugt <2 x i8> %a, +; CHECK-NEXT:[[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], ; CHECK-NEXT:ret <2 x i1> [[CMP]] ; %shr = lshr <2 x i8> , %a @@ -25,7 +25,7 @@ define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) { define i1 @ashr_eq_msb_low_second_zero(i8 %a) { ; CHECK-LABEL: @ashr_eq_msb_low_second_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 %a, 6 +; CHECK-NEXT:[[CMP:%.*]] = icmp ugt i8 [[A:%.*]], 6 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 127, %a @@ -35,7 +35,7 @@ define i1 @ashr_eq_msb_low_second_zero(i8 %a) { define i1 @lshr_ne_msb_low_last_zero(i8 %a) { ; CHECK-LABEL: @lshr_ne_msb_low_last_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 %a, 7 +; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 [[A:%.*]], 7 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -45,7 +45,7 @@ define i1 @lshr_ne_msb_low_last_zero(i8 %a) { define i1 @ashr_ne_msb_low_second_zero(i8 %a) { ; CHECK-LABEL: @ashr_ne_msb_low_second_zero( -; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 %a, 7 +; CHECK-NEXT:[[CMP:%.*]] = icmp ult i8 [[A:%.*]], 7 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 127, %a @@ -55,7 +55,7 @@ define i1 @ashr_ne_msb_low_second_zero(i8 %a) { define i1 @ashr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @ashr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 128, %a @@ -65,7 +65,7 @@ define i1 @ashr_eq_both_equal(i8 %a) { define i1 @ashr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @ashr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr i8 128, %a @@ -75,7 +75,7 @@ define i1 @ashr_ne_both_equal(i8 %a) { define i1 @lshr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @lshr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -85,7 +85,7 @@ define i1 @lshr_eq_both_equal(i8 %a) { define i1 @lshr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @lshr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr i8 127, %a @@ -95,7 +95,7 @@ define i1 @lshr_ne_both_equal(i8 %a) { define i1 @exact_ashr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @exact_ashr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr exact i8 128, %a @@ -105,7 +105,7 @@ define i1 @exact_ashr_eq_both_equal(i8 %a) { define i1 @exact_ashr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @exact_ashr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = ashr exact i8 128, %a @@ -115,7 +115,7 @@ define i1 @exact_ashr_ne_both_equal(i8 %a) { define i1 @exact_lshr_eq_both_equal(i8 %a) { ; CHECK-LABEL: @exact_lshr_eq_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp eq i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ; %shr = lshr exact i8 126, %a @@ -125,7 +125,7 @@ define i1 @exact_lshr_eq_both_equal(i8 %a) { define i1 @exact_lshr_ne_both_equal(i8 %a) { ; CHECK-LABEL: @exact_lshr_ne_both_equal( -; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 %a, 0 +; CHECK-NEXT:[[CMP:%.*]] = icmp ne i8 [[A:%.*]], 0 ; CHECK-NEXT:ret i1 [[CMP]] ;
[llvm-branch-commits] [llvm] dc9ac0e - [RISCV] Replace i32 with XLenVT in (add AddrFI, simm12) isel patterns.
Author: Craig Topper Date: 2021-01-04T10:53:27-08:00 New Revision: dc9ac0e8207654e9ad57e7135276c04fdadbe36f URL: https://github.com/llvm/llvm-project/commit/dc9ac0e8207654e9ad57e7135276c04fdadbe36f DIFF: https://github.com/llvm/llvm-project/commit/dc9ac0e8207654e9ad57e7135276c04fdadbe36f.diff LOG: [RISCV] Replace i32 with XLenVT in (add AddrFI, simm12) isel patterns. With the i32 these patterns will only fire on RV32, but they don't look RV32 specific. Reviewed By: lenary Differential Revision: https://reviews.llvm.org/D93843 Added: Modified: llvm/lib/Target/RISCV/RISCVInstrInfo.td llvm/test/CodeGen/RISCV/vararg.ll Removed: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 04e45f495e22..e31cbd37877d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -893,10 +893,10 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd), /// FrameIndex calculations -def : Pat<(add (i32 AddrFI:$Rs), simm12:$imm12), - (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; -def : Pat<(IsOrAdd (i32 AddrFI:$Rs), simm12:$imm12), - (ADDI (i32 AddrFI:$Rs), simm12:$imm12)>; +def : Pat<(add (XLenVT AddrFI:$Rs), simm12:$imm12), + (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>; +def : Pat<(IsOrAdd (XLenVT AddrFI:$Rs), simm12:$imm12), + (ADDI (XLenVT AddrFI:$Rs), simm12:$imm12)>; /// Setcc diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index 918b1850b3a3..440f1b4a8547 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -108,8 +108,7 @@ define i32 @va1(i8* %fmt, ...) { ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a4, 48(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a2, 32(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT:addi a0, sp, 24 -; LP64-LP64F-LP64D-FPELIM-NEXT:ori a0, a0, 4 +; LP64-LP64F-LP64D-FPELIM-NEXT:addi a0, sp, 28 ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT:lw a0, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT:addi sp, sp, 80 @@ -132,8 +131,7 @@ define i32 @va1(i8* %fmt, ...) { ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a4, 32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a2, 16(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT:addi a0, s0, 8 -; LP64-LP64F-LP64D-WITHFP-NEXT:ori a0, a0, 4 +; LP64-LP64F-LP64D-WITHFP-NEXT:addi a0, s0, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:lw a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:ld s0, 16(sp) # 8-byte Folded Reload @@ -217,8 +215,7 @@ define i32 @va1_va_arg(i8* %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a2, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a1, 24(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT:addi a1, sp, 24 -; LP64-LP64F-LP64D-FPELIM-NEXT:addi a1, a1, 8 +; LP64-LP64F-LP64D-FPELIM-NEXT:addi a1, sp, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a1, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT:addi sp, sp, 80 ; LP64-LP64F-LP64D-FPELIM-NEXT:ret @@ -237,8 +234,7 @@ define i32 @va1_va_arg(i8* %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a2, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a1, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT:addi a1, s0, 8 -; LP64-LP64F-LP64D-WITHFP-NEXT:addi a1, a1, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT:addi a1, s0, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a1, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT:ld ra, 24(sp) # 8-byte Folded Reload @@ -360,8 +356,7 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a3, 24(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a2, 16(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a1, 8(s0) -; LP64-LP64F-LP64D-FPELIM-NEXT:addi a0, s0, 8 -; LP64-LP64F-LP64D-FPELIM-NEXT:addi a0, a0, 8 +; LP64-LP64F-LP64D-FPELIM-NEXT:addi a0, s0, 16 ; LP64-LP64F-LP64D-FPELIM-NEXT:sd a0, -32(s0) ; LP64-LP64F-LP64D-FPELIM-NEXT:slli a0, a1, 32 ; LP64-LP64F-LP64D-FPELIM-NEXT:srli a0, a0, 32 @@ -396,8 +391,7 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a2, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a1, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT:addi a0, s0, 8 -; LP64-LP64F-LP64D-WITHFP-NEXT:addi a0, a0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT:addi a0, s0, 16 ; LP64-LP64F-LP64D-WITHFP-NEXT:sd a0, -32(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT:slli a0, a1, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT:srli a0, a0, 32 @@ -719,8 +713,7 @@ define i64 @va2_va_arg(i8 *%fmt, ...) nounwind { ; LP64-LP64F-LP6
[llvm-branch-commits] [llvm] b8f22f9 - [NewPM][AMDGPU] Run InternalizePass when -amdgpu-internalize-symbols
Author: Arthur Eubanks Date: 2021-01-04T11:34:40-08:00 New Revision: b8f22f9d3000b13c63a323bcf5230929191f402a URL: https://github.com/llvm/llvm-project/commit/b8f22f9d3000b13c63a323bcf5230929191f402a DIFF: https://github.com/llvm/llvm-project/commit/b8f22f9d3000b13c63a323bcf5230929191f402a.diff LOG: [NewPM][AMDGPU] Run InternalizePass when -amdgpu-internalize-symbols The legacy PM doesn't run EP_ModuleOptimizerEarly on -O0, so skip running it here when given O0. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D93886 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/test/CodeGen/AMDGPU/internalize.ll Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7a09c91e62d7..6c730be97b9d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -51,6 +51,8 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/GlobalDCE.h" +#include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -525,6 +527,17 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); }); + PB.registerPipelineEarlySimplificationEPCallback( + [](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) { +if (Level == PassBuilder::OptimizationLevel::O0) + return; + +if (InternalizeSymbols) { + PM.addPass(InternalizePass(mustPreserveGV)); + PM.addPass(GlobalDCEPass()); +} + }); + PB.registerCGSCCOptimizerLateEPCallback( [this, DebugPassManager](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) { diff --git a/llvm/test/CodeGen/AMDGPU/internalize.ll b/llvm/test/CodeGen/AMDGPU/internalize.ll index 138b1c9cc605..8bdd3d450ccf 100644 --- a/llvm/test/CodeGen/AMDGPU/internalize.ll +++ b/llvm/test/CodeGen/AMDGPU/internalize.ll @@ -1,5 +1,7 @@ -; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s ; RUN: opt -O0 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s +; RUN: opt -passes='default' -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPTNONE %s +; RUN: opt -O1 -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s +; RUN: opt -passes='default' -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-internalize-symbols < %s | FileCheck -check-prefix=ALL -check-prefix=OPT %s ; OPT-NOT: gvar_unused ; OPTNONE: gvar_unused ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 848e8f9 - [llvm] Construct SmallVector with iterator ranges (NFC)
Author: Kazu Hirata Date: 2021-01-04T11:42:44-08:00 New Revision: 848e8f938fdbefc98a1e079c8a63768cfe9657ab URL: https://github.com/llvm/llvm-project/commit/848e8f938fdbefc98a1e079c8a63768cfe9657ab DIFF: https://github.com/llvm/llvm-project/commit/848e8f938fdbefc98a1e079c8a63768cfe9657ab.diff LOG: [llvm] Construct SmallVector with iterator ranges (NFC) Added: Modified: llvm/include/llvm/Analysis/TargetTransformInfo.h llvm/include/llvm/Analysis/TargetTransformInfoImpl.h llvm/include/llvm/IR/DebugInfoMetadata.h llvm/include/llvm/IR/Metadata.h llvm/include/llvm/IR/PredIteratorCache.h llvm/lib/Analysis/InstructionSimplify.cpp llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/IR/Constants.cpp llvm/lib/IR/Verifier.cpp llvm/lib/TableGen/Record.cpp Removed: diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index d9d04429b181..ee34312ccf6d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -332,8 +332,7 @@ class TargetTransformInfo { /// This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. int getUserCost(const User *U, TargetCostKind CostKind) const { -SmallVector Operands(U->value_op_begin(), - U->value_op_end()); +SmallVector Operands(U->operand_values()); return getUserCost(U, Operands, CostKind); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index ef0653d0d9f4..47de99b02d97 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1071,8 +1071,7 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { } int getInstructionLatency(const Instruction *I) { -SmallVector Operands(I->value_op_begin(), - I->value_op_end()); +SmallVector Operands(I->operand_values()); if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free) return 0; diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index 94b3beb4effd..20c212ca04e1 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -240,9 +240,8 @@ class GenericDINode : public DINode { StorageType Storage, bool ShouldCreate = true); TempGenericDINode cloneImpl() const { -return getTemporary( -getContext(), getTag(), getHeader(), -SmallVector(dwarf_op_begin(), dwarf_op_end())); +return getTemporary(getContext(), getTag(), getHeader(), +SmallVector(dwarf_operands())); } public: diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index edd1d4b3b839..33b92c3c90da 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1128,8 +1128,7 @@ class MDTuple : public MDNode { StorageType Storage, bool ShouldCreate = true); TempMDTuple cloneImpl() const { -return getTemporary(getContext(), -SmallVector(op_begin(), op_end())); +return getTemporary(getContext(), SmallVector(operands())); } public: diff --git a/llvm/include/llvm/IR/PredIteratorCache.h b/llvm/include/llvm/IR/PredIteratorCache.h index 4d8efcfa9eab..6bbd7e5e87a0 100644 --- a/llvm/include/llvm/IR/PredIteratorCache.h +++ b/llvm/include/llvm/IR/PredIteratorCache.h @@ -44,7 +44,7 @@ class PredIteratorCache { if (Entry) return Entry; -SmallVector PredCache(pred_begin(BB), pred_end(BB)); +SmallVector PredCache(predecessors(BB)); PredCache.push_back(nullptr); // null terminator. BlockToPredCountMap[BB] = PredCache.size() - 1; diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index d89a776b7908..dfaf36a96953 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -3599,7 +3599,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // expression GEP with the same indices and a null base pointer to see // what constant folding can make out of it. Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType()); -SmallVector IndicesLHS(GLHS->idx_begin(), GLHS->idx_end()); +SmallVector IndicesLHS(GLHS->indices()); Constant *NewLHS = ConstantExpr::getGetElementPtr( GLHS->getSourceElementType(), Null, IndicesLHS); @@ -5814,7 +5814,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
[llvm-branch-commits] [llvm] eb198f4 - [llvm] Use llvm::any_of (NFC)
Author: Kazu Hirata Date: 2021-01-04T11:42:47-08:00 New Revision: eb198f4c3cedae4e9778f11e76e0fa1f1b61b622 URL: https://github.com/llvm/llvm-project/commit/eb198f4c3cedae4e9778f11e76e0fa1f1b61b622 DIFF: https://github.com/llvm/llvm-project/commit/eb198f4c3cedae4e9778f11e76e0fa1f1b61b622.diff LOG: [llvm] Use llvm::any_of (NFC) Added: Modified: llvm/include/llvm/CodeGen/LiveInterval.h llvm/lib/CodeGen/MachineSink.cpp llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp llvm/lib/Passes/PassBuilder.cpp llvm/lib/Support/SourceMgr.cpp llvm/lib/Target/AArch64/AArch64InstrInfo.cpp llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/tools/llvm-readobj/ELFDumper.cpp llvm/utils/TableGen/AsmMatcherEmitter.cpp llvm/utils/TableGen/GICombinerEmitter.cpp Removed: diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index 6dc1a3074e22..c2b158ac1b7f 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -598,10 +598,9 @@ namespace llvm { /// @p End. bool isUndefIn(ArrayRef Undefs, SlotIndex Begin, SlotIndex End) const { - return std::any_of(Undefs.begin(), Undefs.end(), -[Begin,End] (SlotIndex Idx) -> bool { - return Begin <= Idx && Idx < End; -}); + return llvm::any_of(Undefs, [Begin, End](SlotIndex Idx) -> bool { +return Begin <= Idx && Idx < End; + }); } /// Flush segment set into the regular segment vector. diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 48ed8b0c5e73..42ab961b12e8 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -1024,9 +1024,9 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasStoreCache[BlockPair]; if (StoreInstrCache.find(BlockPair) != StoreInstrCache.end()) -return std::any_of( -StoreInstrCache[BlockPair].begin(), StoreInstrCache[BlockPair].end(), -[&](MachineInstr *I) { return I->mayAlias(AA, MI, false); }); +return llvm::any_of(StoreInstrCache[BlockPair], [&](MachineInstr *I) { + return I->mayAlias(AA, MI, false); +}); bool SawStore = false; bool HasAliasedStore = false; diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp index 78fc5a69d46b..c607ce4d91af 100644 --- a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp +++ b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp @@ -71,10 +71,10 @@ SelfTargetProcessControl::lookupSymbols( for (auto &Elem : Request) { auto *Dylib = jitTargetAddressToPointer(Elem.Handle); -assert(llvm::find_if(DynamicLibraries, - [=](const std::unique_ptr &DL) { - return DL.get() == Dylib; - }) != DynamicLibraries.end() && +assert(llvm::any_of(DynamicLibraries, +[=](const std::unique_ptr &DL) { + return DL.get() == Dylib; +}) && "Invalid handle"); R.push_back(std::vector()); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 96d96d43b432..7cebb5985614 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -2657,9 +2657,8 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, return Err; // Add the nested pass manager with the appropriate adaptor. bool UseMemorySSA = (Name == "loop-mssa"); - bool UseBFI = - std::any_of(InnerPipeline.begin(), InnerPipeline.end(), - [](auto Pipeline) { return Pipeline.Name == "licm"; }); + bool UseBFI = llvm::any_of( + InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; }); FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA, UseBFI, DebugLogging)); return Error::success(); diff --git a/llvm/lib/Support/SourceMgr.cpp b/llvm/lib/Support/SourceMgr.cpp index 499d9810677e..89b7dc939dfc 100644 --- a/llvm/lib/Support/SourceMgr.cpp +++ b/llvm/lib/Support/SourceMgr.cpp @@ -522,7 +522,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &OS, bool ShowColors, // map like Clang's TextDiagnostic. For now, we'll just handle tabs by // expanding them later, and bail out rather than show incorrect ranges and // misaligned fixits for any other odd characters. - if (find_if(LineContents, isNonASCII) != LineContents.end()) { + if (any_of(LineContents, isNonASCII)) { printSourceLine(OS, LineContents); return; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp ind
[llvm-branch-commits] [llvm] 0edbc90 - [DebugInfo] Use llvm::append_range (NFC)
Author: Kazu Hirata Date: 2021-01-04T11:42:45-08:00 New Revision: 0edbc90ec565758f5243b83b1c0a13beaf21214f URL: https://github.com/llvm/llvm-project/commit/0edbc90ec565758f5243b83b1c0a13beaf21214f DIFF: https://github.com/llvm/llvm-project/commit/0edbc90ec565758f5243b83b1c0a13beaf21214f.diff LOG: [DebugInfo] Use llvm::append_range (NFC) Added: Modified: llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h llvm/lib/DebugInfo/DWARF/DWARFDie.cpp llvm/lib/DebugInfo/MSF/MSFBuilder.cpp llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp Removed: diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h index 59bdd2a7c9f2..3b6d1b0b1a70 100644 --- a/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -697,7 +697,7 @@ class VFTableRecord : public TypeRecord { : TypeRecord(TypeRecordKind::VFTable), CompleteClass(CompleteClass), OverriddenVFTable(OverriddenVFTable), VFPtrOffset(VFPtrOffset) { MethodNames.push_back(Name); -MethodNames.insert(MethodNames.end(), Methods.begin(), Methods.end()); +llvm::append_range(MethodNames, Methods); } TypeIndex getCompleteClass() const { return CompleteClass; } diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 89b29b428fce..df411bf91041 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -479,8 +479,7 @@ void DWARFDie::collectChildrenAddressRanges( return; if (isSubprogramDIE()) { if (auto DIERangesOrError = getAddressRanges()) - Ranges.insert(Ranges.end(), DIERangesOrError.get().begin(), -DIERangesOrError.get().end()); + llvm::append_range(Ranges, DIERangesOrError.get()); else llvm::consumeError(DIERangesOrError.takeError()); } diff --git a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp index c6fe764ab7e0..f946dd4860ac 100644 --- a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp +++ b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp @@ -204,8 +204,7 @@ Error MSFBuilder::setStreamSize(uint32_t Idx, uint32_t Size) { if (auto EC = allocateBlocks(AddedBlocks, AddedBlockList)) return EC; auto &CurrentBlocks = StreamData[Idx].second; -CurrentBlocks.insert(CurrentBlocks.end(), AddedBlockList.begin(), - AddedBlockList.end()); +llvm::append_range(CurrentBlocks, AddedBlockList); } else if (OldBlocks > NewBlocks) { // For shrinking, free all the Blocks in the Block map, update the stream // data, then shrink the directory. @@ -268,8 +267,7 @@ Expected MSFBuilder::generateLayout() { ExtraBlocks.resize(NumExtraBlocks); if (auto EC = allocateBlocks(NumExtraBlocks, ExtraBlocks)) return std::move(EC); -DirectoryBlocks.insert(DirectoryBlocks.end(), ExtraBlocks.begin(), - ExtraBlocks.end()); +llvm::append_range(DirectoryBlocks, ExtraBlocks); } else if (NumDirectoryBlocks < DirectoryBlocks.size()) { uint32_t NumUnnecessaryBlocks = DirectoryBlocks.size() - NumDirectoryBlocks; for (auto B : diff --git a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp index 4a88391494cd..1d873b87b347 100644 --- a/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -116,7 +116,7 @@ StringMap NamedStreamMap::entries() const { uint32_t NamedStreamMap::appendStringData(StringRef S) { uint32_t Offset = NamesBuffer.size(); - NamesBuffer.insert(NamesBuffer.end(), S.begin(), S.end()); + llvm::append_range(NamesBuffer, S); NamesBuffer.push_back('\0'); return Offset; } diff --git a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp index 1296be3c9413..3e734c39e942 100644 --- a/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/SymbolCache.cpp @@ -519,8 +519,7 @@ SymbolCache::findLineTable(uint16_t Modi) const { return LHS[0].Addr < RHS[0].Addr; }); for (size_t I = 0; I < EntryList.size(); ++I) -ModuleLineTable.insert(ModuleLineTable.end(), EntryList[I].begin(), - EntryList[I].end()); +llvm::append_range(ModuleLineTable, EntryList[I]); return ModuleLineTable; } diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp index b5e7b03e6917..5f4f497690b6 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -91,7 +91,7 @@ void TpiStreamBuilder::addTypeRecords(ArrayRef Types, updat
[llvm-branch-commits] [llvm] a5f863e - [NewPM][AMDGPU] Port amdgpu-propagate-attributes-early/late
Author: Arthur Eubanks Date: 2021-01-04T11:53:37-08:00 New Revision: a5f863e0765e9056f302dbf0683f92dad6e8efb9 URL: https://github.com/llvm/llvm-project/commit/a5f863e0765e9056f302dbf0683f92dad6e8efb9 DIFF: https://github.com/llvm/llvm-project/commit/a5f863e0765e9056f302dbf0683f92dad6e8efb9.diff LOG: [NewPM][AMDGPU] Port amdgpu-propagate-attributes-early/late And add to AMDGPU opt pipeline. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D94022 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll llvm/test/CodeGen/AMDGPU/propagate-attributes-flat-work-group-size.ll llvm/test/CodeGen/AMDGPU/propagate-attributes-single-set.ll llvm/tools/opt/opt.cpp Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 623bbb2db325..c06f9ad66009 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -123,9 +123,27 @@ struct AMDGPULowerKernelAttributesPass void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); extern char &AMDGPUPropagateAttributesEarlyID; +struct AMDGPUPropagateAttributesEarlyPass +: PassInfoMixin { + AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + TargetMachine &TM; +}; + void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); extern char &AMDGPUPropagateAttributesLateID; +struct AMDGPUPropagateAttributesLatePass +: PassInfoMixin { + AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + TargetMachine &TM; +}; + void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); extern char &AMDGPURewriteOutArgumentsID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp index dcbe4270e8a9..56512529e7fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/Utils/Cloning.h" #include @@ -409,3 +410,21 @@ ModulePass *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { return new AMDGPUPropagateAttributesLate(TM); } + +PreservedAnalyses +AMDGPUPropagateAttributesEarlyPass::run(Function &F, +FunctionAnalysisManager &AM) { + if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) +return PreservedAnalyses::all(); + + return AMDGPUPropagateAttributes(&TM, false).process(F) + ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} + +PreservedAnalyses +AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) { + return AMDGPUPropagateAttributes(&TM, true).process(M) + ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 6c730be97b9d..765ec045d5f1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -491,6 +491,15 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, bool DebugPassManager) { + PB.registerPipelineParsingCallback( + [this](StringRef PassName, ModulePassManager &PM, + ArrayRef) { +if (PassName == "amdgpu-propagate-attributes-late") { + PM.addPass(AMDGPUPropagateAttributesLatePass(*this)); + return true; +} +return false; + }); PB.registerPipelineParsingCallback( [this](StringRef PassName, FunctionPassManager &PM, ArrayRef) { @@ -514,13 +523,19 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, PM.addPass(AMDGPULowerKernelAttributesPass()); return true; } +if (PassName == "amdgpu-propagate-attributes-early") { + PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this)); + return true; +} + return false; }); - PB.registerPipelineStartEPCallback([DebugPassManager]( + PB.registerPipelineStartEPCallback([this, DebugPassManager]( ModulePassManager &PM, PassBuilder::OptimizationLevel Level) { Functi
[llvm-branch-commits] [llvm] e1833e7 - [NewPM][AMDGPU] Port amdgpu-unify-metadata
Author: Arthur Eubanks Date: 2021-01-04T11:57:46-08:00 New Revision: e1833e7493aede34aaf9c1a4480848d60dad7f1d URL: https://github.com/llvm/llvm-project/commit/e1833e7493aede34aaf9c1a4480848d60dad7f1d DIFF: https://github.com/llvm/llvm-project/commit/e1833e7493aede34aaf9c1a4480848d60dad7f1d.diff LOG: [NewPM][AMDGPU] Port amdgpu-unify-metadata And add to AMDGPU opt pipeline. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D94023 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp llvm/test/CodeGen/AMDGPU/unify-metadata.ll llvm/tools/opt/opt.cpp Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index c06f9ad66009..ac8b0effbdab 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -264,6 +264,10 @@ ModulePass* createAMDGPUUnifyMetadataPass(); void initializeAMDGPUUnifyMetadataPass(PassRegistry&); extern char &AMDGPUUnifyMetadataID; +struct AMDGPUUnifyMetadataPass : PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); extern char &SIOptimizeExecMaskingPreRAID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 765ec045d5f1..0e3d6df0d6ae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -498,6 +498,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, PM.addPass(AMDGPUPropagateAttributesLatePass(*this)); return true; } +if (PassName == "amdgpu-unify-metadata") { + PM.addPass(AMDGPUUnifyMetadataPass()); + return true; +} return false; }); PB.registerPipelineParsingCallback( @@ -547,6 +551,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, if (Level == PassBuilder::OptimizationLevel::O0) return; +PM.addPass(AMDGPUUnifyMetadataPass()); + if (InternalizeSymbols) { PM.addPass(InternalizePass(mustPreserveGV)); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp index f898456203a1..f3c9add70357 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include #include @@ -45,6 +46,7 @@ namespace { private: bool runOnModule(Module &M) override; + }; /// Unify version metadata. /// \return true if changes are made. @@ -106,41 +108,42 @@ namespace { return true; } -}; -} // end anonymous namespace + bool unifyMetadataImpl(Module &M) { +const char *Vers[] = {kOCLMD::SpirVer, kOCLMD::OCLVer}; +const char *Exts[] = {kOCLMD::UsedExt, kOCLMD::UsedOptCoreFeat, + kOCLMD::CompilerOptions, kOCLMD::LLVMIdent}; -char AMDGPUUnifyMetadata::ID = 0; +bool Changed = false; -char &llvm::AMDGPUUnifyMetadataID = AMDGPUUnifyMetadata::ID; +for (auto &I : Vers) + Changed |= unifyVersionMD(M, I, true); -INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata", -"Unify multiple OpenCL metadata due to linking", -false, false) +for (auto &I : Exts) + Changed |= unifyExtensionMD(M, I); -ModulePass* llvm::createAMDGPUUnifyMetadataPass() { - return new AMDGPUUnifyMetadata(); -} +return Changed; + } -bool AMDGPUUnifyMetadata::runOnModule(Module &M) { - const char* Vers[] = { - kOCLMD::SpirVer, - kOCLMD::OCLVer - }; - const char* Exts[] = { - kOCLMD::UsedExt, - kOCLMD::UsedOptCoreFeat, - kOCLMD::CompilerOptions, - kOCLMD::LLVMIdent - }; + } // end anonymous namespace - bool Changed = false; + char AMDGPUUnifyMetadata::ID = 0; - for (auto &I : Vers) -Changed |= unifyVersionMD(M, I, true); + char &llvm::AMDGPUUnifyMetadataID = AMDGPUUnifyMetadata::ID; - for (auto &I : Exts) -Changed |= unifyExtensionMD(M, I); + INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata", + "Unify multiple OpenCL metadata due to linking", false, false) - return Changed; -} + ModulePass *llvm::createAMDGPUUnifyMetadataPass() { +return new AMDGPUUnifyMetadata(); + } + + bool AMDGPUUnifyMetadata::runOnModule(Module &M) { +return unifyMetadataImpl(M); + } + + PreservedAnalyses AMDGPUUnifyMetadataPass::run(Module &M, + ModuleAnalysisManager &AM) { +return unifyMetadataImpl(M) ? Preserved
[llvm-branch-commits] [llvm] 9a17bff - [LoopNest] Allow empty basic blocks without loops
Author: Whitney Tsang Date: 2021-01-04T19:59:50Z New Revision: 9a17bff4f715a9f3ec89f4eacae8fdea1b74fe79 URL: https://github.com/llvm/llvm-project/commit/9a17bff4f715a9f3ec89f4eacae8fdea1b74fe79 DIFF: https://github.com/llvm/llvm-project/commit/9a17bff4f715a9f3ec89f4eacae8fdea1b74fe79.diff LOG: [LoopNest] Allow empty basic blocks without loops Allow loop nests with empty basic blocks without loops in different levels as perfect. Reviewers: Meinersbur Differential Revision: https://reviews.llvm.org/D93665 Added: Modified: llvm/include/llvm/Analysis/LoopNestAnalysis.h llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h llvm/lib/Analysis/LoopNestAnalysis.cpp llvm/lib/Transforms/Utils/BasicBlockUtils.cpp llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll Removed: diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 4d77d735819f..692909db8341 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -128,6 +128,12 @@ class LoopNest { [](const Loop *L) { return L->isLoopSimplifyForm(); }); } + /// Return true if all loops in the loop nest are in rotated form. + bool areAllLoopsRotatedForm() const { +return std::all_of(Loops.begin(), Loops.end(), + [](const Loop *L) { return L->isRotatedForm(); }); + } + StringRef getName() const { return Loops.front()->getName(); } protected: diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 64c569de1f58..fd5a7daf3add 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -244,6 +244,12 @@ unsigned SplitAllCriticalEdges(Function &F, const CriticalEdgeSplittingOptions &Options = CriticalEdgeSplittingOptions()); +/// Recursivelly traverse all empty 'single successor' basic blocks of \p From +/// (if there are any). Return the last basic block found or \p End if it was +/// reached during the search. +const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, + const BasicBlock *End); + /// Split the edge connecting the specified blocks, and return the newly created /// basic block between \p From and \p To. BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp index ef10b7e97461..abc219a8bd32 100644 --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -253,49 +254,66 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, // Ensure the only branch that may exist between the loops is the inner loop // guard. if (OuterLoopHeader != InnerLoopPreHeader) { -const BranchInst *BI = -dyn_cast(OuterLoopHeader->getTerminator()); - -if (!BI || BI != InnerLoop.getLoopGuardBranch()) - return false; - -bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); - -// The successors of the inner loop guard should be the inner loop -// preheader and the outer loop latch. -for (const BasicBlock *Succ : BI->successors()) { - if (Succ == InnerLoopPreHeader) -continue; - if (Succ == OuterLoopLatch) -continue; - - // If `InnerLoopExit` contains LCSSA Phi instructions, additional block - // may be inserted before the `OuterLoopLatch` to which `BI` jumps. The - // loops are still considered perfectly nested if the extra block only - // contains Phi instructions from InnerLoopExit and OuterLoopHeader. - if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && - Succ->getSingleSuccessor() == OuterLoopLatch) { -// Points to the extra block so that we can reference it later in the -// final check. We can also conclude that the inner loop is -// guarded and there exists LCSSA Phi node in the exit block later if we -// see a non-null `ExtraPhiBlock`. -ExtraPhiBlock = Succ; -continue; - } +const BasicBlock &SingleSucc = +skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader); - DEBUG_WITH_TYPE(VerboseDebug, { -dbgs() << "Inner loop guard successor " << Succ->getName() - << " doesn't lead to inner loop preheader or " - "outer loop latch.\n"; - }); - return false; +// no conditional branch present +if (&SingleSucc != InnerLoopPreH
[llvm-branch-commits] [clang] 4034f92 - Switching Clang UniqueInternalLinkageNamesPass scheduling to using the LLVM one with newpm.
Author: Hongtao Yu Date: 2021-01-04T12:04:46-08:00 New Revision: 4034f9273edacbb1c37acf19139594a226c8bdac URL: https://github.com/llvm/llvm-project/commit/4034f9273edacbb1c37acf19139594a226c8bdac DIFF: https://github.com/llvm/llvm-project/commit/4034f9273edacbb1c37acf19139594a226c8bdac.diff LOG: Switching Clang UniqueInternalLinkageNamesPass scheduling to using the LLVM one with newpm. As a follow-up to D93656, I'm switching the Clang UniqueInternalLinkageNamesPass scheduling to using the LLVM one with newpm. Test Plan: Reviewed By: aeubanks, tmsriram Differential Revision: https://reviews.llvm.org/D94019 Added: Modified: clang/lib/CodeGen/BackendUtil.cpp Removed: diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index b326c643738f..296b111feb2d 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1145,6 +1145,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; PTO.Coroutines = LangOpts.Coroutines; + PTO.UniqueLinkageNames = CodeGenOpts.UniqueInternalLinkageNames; PassInstrumentationCallbacks PIC; StandardInstrumentations SI(CodeGenOpts.DebugPassManager); @@ -1326,11 +1327,6 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager( MPM = PB.buildPerModuleDefaultPipeline(Level); } -// Add UniqueInternalLinkageNames Pass which renames internal linkage -// symbols with unique names. -if (CodeGenOpts.UniqueInternalLinkageNames) - MPM.addPass(UniqueInternalLinkageNamesPass()); - if (!CodeGenOpts.MemoryProfileOutput.empty()) { MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass())); MPM.addPass(ModuleMemProfilerPass()); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 4d0aad9 - [flang][openmp] Make Reduction clause part of OmpClause
Author: Valentin Clement Date: 2021-01-04T15:19:00-05:00 New Revision: 4d0aad96e431ba78323dd3c7ee9ecd6f5552375d URL: https://github.com/llvm/llvm-project/commit/4d0aad96e431ba78323dd3c7ee9ecd6f5552375d DIFF: https://github.com/llvm/llvm-project/commit/4d0aad96e431ba78323dd3c7ee9ecd6f5552375d.diff LOG: [flang][openmp] Make Reduction clause part of OmpClause After discussion in D93105 we found that the reduction clause was not following the common OmpClause convention. This patch makes reduction clause part of OmpClause with a value of OmpReductionClause in a similar way than task_reduction. The unparse function for OmpReductionClause is adapted since the keyword and parenthesis are issued by the corresponding unparse function for parser::OmpClause::Reduction. Reviewed By: sameeranjoshi Differential Revision: https://reviews.llvm.org/D93482 Added: Modified: flang/lib/Parser/openmp-parsers.cpp flang/lib/Parser/unparse.cpp flang/lib/Semantics/check-omp-structure.cpp flang/lib/Semantics/check-omp-structure.h llvm/include/llvm/Frontend/OpenMP/OMP.td Removed: diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 3a0d28cd9c12..4588a95f2938 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -218,8 +218,8 @@ TYPE_PARSER( parenthesized(Parser{}))) || "PROC_BIND" >> construct(construct( parenthesized(Parser{}))) || -"REDUCTION" >> -construct(parenthesized(Parser{})) || +"REDUCTION" >> construct(construct( + parenthesized(Parser{}))) || "TASK_REDUCTION" >> construct(construct( parenthesized(Parser{}))) || diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ba54a0a84fa7..2086862551d9 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2013,11 +2013,9 @@ class UnparseVisitor { Put(")"); } void Unparse(const OmpReductionClause &x) { -Word("REDUCTION("); Walk(std::get(x.t)); Put(":"); Walk(std::get(x.t)); -Put(")"); } void Unparse(const OmpAllocateClause &x) { Walk(std::get>(x.t)); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index a144c7a2b57b..1a7e1dfdbfd2 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -419,6 +419,7 @@ CHECK_SIMPLE_CLAUSE(Mergeable, OMPC_mergeable) CHECK_SIMPLE_CLAUSE(Nogroup, OMPC_nogroup) CHECK_SIMPLE_CLAUSE(Notinbranch, OMPC_notinbranch) CHECK_SIMPLE_CLAUSE(Nowait, OMPC_nowait) +CHECK_SIMPLE_CLAUSE(Reduction, OMPC_reduction) CHECK_SIMPLE_CLAUSE(TaskReduction, OMPC_task_reduction) CHECK_SIMPLE_CLAUSE(To, OMPC_to) CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform) @@ -495,7 +496,6 @@ void OmpStructureChecker::CheckIsVarPartOfAnotherVar( } } // Following clauses have a seperate node in parse-tree.h. -CHECK_SIMPLE_PARSER_CLAUSE(OmpReductionClause, OMPC_reduction) // Atomic-clause CHECK_SIMPLE_PARSER_CLAUSE(OmpAtomicRead, OMPC_read) CHECK_SIMPLE_PARSER_CLAUSE(OmpAtomicWrite, OMPC_write) diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index ccd0e08a8c08..7a96db3ec603 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -152,6 +152,7 @@ class OmpStructureChecker void Enter(const parser::OmpClause::Priority &); void Enter(const parser::OmpClause::Private &); void Enter(const parser::OmpClause::ProcBind &); + void Enter(const parser::OmpClause::Reduction &); void Enter(const parser::OmpClause::Safelen &); void Enter(const parser::OmpClause::Shared &); void Enter(const parser::OmpClause::Simdlen &); @@ -184,7 +185,6 @@ class OmpStructureChecker void Enter(const parser::OmpIfClause &); void Enter(const parser::OmpLinearClause &); void Enter(const parser::OmpMapClause &); - void Enter(const parser::OmpReductionClause &); void Enter(const parser::OmpScheduleClause &); private: diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 9fd14cb03a47..b5b5bb238bae 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -82,7 +82,7 @@ def OMPC_Shared : Clause<"shared"> { } def OMPC_Reduction : Clause<"reduction"> { let clangClass = "OMPReductionClause"; - let flangClass = "OmpReductionClause"; + let flangClassValue = "OmpReductionClause"; } def OMPC_Linear : Clause<"linear"> { let clangClass = "OMPLinearClause"; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] fe597ef - [RISCV] Remove unused method RISCVInstPrinter::printSImm5Plus1. NFC
Author: Craig Topper Date: 2021-01-04T12:21:35-08:00 New Revision: fe597efc30b22bac5b49ffb64e52300d661c7d78 URL: https://github.com/llvm/llvm-project/commit/fe597efc30b22bac5b49ffb64e52300d661c7d78 DIFF: https://github.com/llvm/llvm-project/commit/fe597efc30b22bac5b49ffb64e52300d661c7d78.diff LOG: [RISCV] Remove unused method RISCVInstPrinter::printSImm5Plus1. NFC simm5_plus1 is only used by InstAliases so should never be printed. Added: Modified: llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h llvm/lib/Target/RISCV/RISCVInstrInfoV.td Removed: diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 7e4590456621..65edd3b4d4f6 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -187,15 +187,6 @@ void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo, O << ".t"; } -void RISCVInstPrinter::printSImm5Plus1(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, - raw_ostream &O) { - const MCOperand &MO = MI->getOperand(OpNo); - - assert(MO.isImm() && "printSImm5Plus1 can only print constant operands"); - O << MO.getImm() + 1; -} - const char *RISCVInstPrinter::getRegisterName(unsigned RegNo) { return getRegisterName(RegNo, ArchRegNames ? RISCV::NoRegAltName : RISCV::ABIRegAltName); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index ddffcbe343d7..d078ead2c8ad 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -46,8 +46,6 @@ class RISCVInstPrinter : public MCInstPrinter { raw_ostream &O); void printVMaskReg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); - void printSImm5Plus1(const MCInst *MI, unsigned OpNo, - const MCSubtargetInfo &STI, raw_ostream &O); // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index b015ae0d2dba..edcde5fbbb39 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -69,7 +69,6 @@ def SImm5Plus1AsmOperand : AsmOperandClass { def simm5_plus1 : Operand, ImmLeaf(Imm - 1);}]> { let ParserMatchClass = SImm5Plus1AsmOperand; - let PrintMethod = "printSImm5Plus1"; let MCOperandPredicate = [{ int64_t Imm; if (MCOp.evaluateAsConstantImm(Imm)) ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] fd323a8 - [NewPM][AMDGPU] Port amdgpu-printf-runtime-binding
Author: Arthur Eubanks Date: 2021-01-04T12:25:50-08:00 New Revision: fd323a897c666b847e8818f63331dfcd1842953e URL: https://github.com/llvm/llvm-project/commit/fd323a897c666b847e8818f63331dfcd1842953e DIFF: https://github.com/llvm/llvm-project/commit/fd323a897c666b847e8818f63331dfcd1842953e.diff LOG: [NewPM][AMDGPU] Port amdgpu-printf-runtime-binding And add to AMDGPU opt pipeline. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D94026 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/test/CodeGen/AMDGPU/opencl-printf.ll llvm/tools/opt/opt.cpp Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index ac8b0effbdab..ea2755d4b6ed 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -260,6 +260,11 @@ ModulePass *createAMDGPUPrintfRuntimeBinding(); void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); extern char &AMDGPUPrintfRuntimeBindingID; +struct AMDGPUPrintfRuntimeBindingPass +: PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + ModulePass* createAMDGPUUnifyMetadataPass(); void initializeAMDGPUUnifyMetadataPass(PassRegistry&); extern char &AMDGPUUnifyMetadataID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index 31c6c0bb0c2f..80a7acb63783 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -32,10 +32,12 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -44,8 +46,7 @@ using namespace llvm; #define DWORD_ALIGN 4 namespace { -class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final -: public ModulePass { +class AMDGPUPrintfRuntimeBinding final : public ModulePass { public: static char ID; @@ -54,25 +55,36 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUPrintfRuntimeBinding final private: bool runOnModule(Module &M) override; - void getConversionSpecifiers(SmallVectorImpl &OpConvSpecifiers, - StringRef fmt, size_t num_ops) const; - - bool shouldPrintAsStr(char Specifier, Type *OpType) const; - bool - lowerPrintfForGpu(Module &M, -function_ref GetTLI); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); } +}; + +class AMDGPUPrintfRuntimeBindingImpl { +public: + AMDGPUPrintfRuntimeBindingImpl( + function_ref GetDT, + function_ref GetTLI) + : GetDT(GetDT), GetTLI(GetTLI) {} + bool run(Module &M); + +private: + void getConversionSpecifiers(SmallVectorImpl &OpConvSpecifiers, + StringRef fmt, size_t num_ops) const; - Value *simplify(Instruction *I, const TargetLibraryInfo *TLI) { + bool shouldPrintAsStr(char Specifier, Type *OpType) const; + bool lowerPrintfForGpu(Module &M); + + Value *simplify(Instruction *I, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { return SimplifyInstruction(I, {*TD, TLI, DT}); } const DataLayout *TD; - const DominatorTree *DT; + function_ref GetDT; + function_ref GetTLI; SmallVector Printfs; }; } // namespace @@ -95,12 +107,11 @@ ModulePass *createAMDGPUPrintfRuntimeBinding() { } } // namespace llvm -AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() -: ModulePass(ID), TD(nullptr), DT(nullptr) { +AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) { initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry()); } -void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( +void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers( SmallVectorImpl &OpConvSpecifiers, StringRef Fmt, size_t NumOps) const { // not all format characters are collected. @@ -132,8 +143,8 @@ void AMDGPUPrintfRuntimeBinding::getConversionSpecifiers( } } -bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, - Type *OpType) const { +bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier, + Type *OpType) const { if (Specifier != 's') return false; const PointerType *PT = dyn_cast(OpType); @@ -146,8 +157,7 @@ bool AMDGPUPrintfRuntimeBinding::shouldPrintAsStr(char Specifier, retur
[llvm-branch-commits] [llvm] 4e838ba - [NewPM][AMDGPU] Port amdgpu-always-inline
Author: Arthur Eubanks Date: 2021-01-04T12:27:01-08:00 New Revision: 4e838ba9ea2cc7effbb051fdacf74a738b35eb6a URL: https://github.com/llvm/llvm-project/commit/4e838ba9ea2cc7effbb051fdacf74a738b35eb6a DIFF: https://github.com/llvm/llvm-project/commit/4e838ba9ea2cc7effbb051fdacf74a738b35eb6a.diff LOG: [NewPM][AMDGPU] Port amdgpu-always-inline And add to AMDGPU opt pipeline. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D94025 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll llvm/tools/opt/opt.cpp Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index ea2755d4b6ed..503f1022bdae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -253,6 +253,15 @@ FunctionPass *createAMDGPUISelDag( TargetMachine *TM = nullptr, CodeGenOpt::Level OptLevel = CodeGenOpt::Default); ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); + +struct AMDGPUAlwaysInlinePass : PassInfoMixin { + AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + bool GlobalOpt; +}; + ModulePass *createR600OpenCLImageTypeLoweringPass(); FunctionPass *createAMDGPUAnnotateUniformValues(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index 22947544ac07..aefc05b81f95 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -17,6 +17,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; @@ -32,8 +33,6 @@ static cl::opt StressCalls( class AMDGPUAlwaysInline : public ModulePass { bool GlobalOpt; - void recursivelyVisitUsers(GlobalValue &GV, - SmallPtrSetImpl &FuncsToAlwaysInline); public: static char ID; @@ -53,9 +52,9 @@ INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", char AMDGPUAlwaysInline::ID = 0; -void AMDGPUAlwaysInline::recursivelyVisitUsers( - GlobalValue &GV, - SmallPtrSetImpl &FuncsToAlwaysInline) { +static void +recursivelyVisitUsers(GlobalValue &GV, + SmallPtrSetImpl &FuncsToAlwaysInline) { SmallVector Stack; SmallPtrSet Visited; @@ -91,7 +90,7 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers( } } -bool AMDGPUAlwaysInline::runOnModule(Module &M) { +static bool alwaysInlineImpl(Module &M, bool GlobalOpt) { std::vector AliasesToRemove; SmallPtrSet FuncsToAlwaysInline; @@ -157,7 +156,16 @@ bool AMDGPUAlwaysInline::runOnModule(Module &M) { return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty(); } +bool AMDGPUAlwaysInline::runOnModule(Module &M) { + return alwaysInlineImpl(M, GlobalOpt); +} + ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) { return new AMDGPUAlwaysInline(GlobalOpt); } +PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M, + ModuleAnalysisManager &AM) { + alwaysInlineImpl(M, GlobalOpt); + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index fb50662a3f77..d3bea7f9469e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -506,6 +506,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, PM.addPass(AMDGPUPrintfRuntimeBindingPass()); return true; } +if (PassName == "amdgpu-always-inline") { + PM.addPass(AMDGPUAlwaysInlinePass()); + return true; +} return false; }); PB.registerPipelineParsingCallback( @@ -565,6 +569,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, if (InternalizeSymbols) { PM.addPass(GlobalDCEPass()); } +if (EarlyInlineAll && !EnableFunctionCalls) + PM.addPass(AMDGPUAlwaysInlinePass()); }); PB.registerCGSCCOptimizerLateEPCallback( diff --git a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll index 8ab59dc2224c..91ee7ff29b64 100644 --- a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll +++ b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-always-inline %s | FileCheck -check-prefixes=CALLS-ENABLED,ALL %s +; RUN: opt -S -m
[llvm-branch-commits] [llvm] 1915523 - [NewPM][AMDGPU] Make amdgpu-aa work with NewPM
Author: Arthur Eubanks Date: 2021-01-04T12:36:27-08:00 New Revision: 191552344bba04c428de4a34b83d6f7537a4a596 URL: https://github.com/llvm/llvm-project/commit/191552344bba04c428de4a34b83d6f7537a4a596 DIFF: https://github.com/llvm/llvm-project/commit/191552344bba04c428de4a34b83d6f7537a4a596.diff LOG: [NewPM][AMDGPU] Make amdgpu-aa work with NewPM An AMDGPUAA class already existed that was supposed to work with the new PM, but it wasn't tested and was a bit broken. Fix up the existing classes to have the right keys/parameters. Wire up AMDGPUAA inside AMDGPUTargetMachine. Add it to the list of alias analyses for the "default" AAManager since in adjustPassManager() amdgpu-aa is added into the pipeline at the beginning. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D93914 Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 9059f2886ea2f..d4f97b2ed22dd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -32,6 +32,8 @@ using namespace llvm; #define DEBUG_TYPE "amdgpu-aa" +AnalysisKey AMDGPUAA::Key; + // Register this pass... char AMDGPUAAWrapperPass::ID = 0; char AMDGPUExternalAAWrapper::ID = 0; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index fd8889ea5c0dd..f3e64d8b9d9a8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -42,7 +42,10 @@ class AMDGPUAAResult : public AAResultBase { /// Handle invalidation events from the new pass manager. /// /// By definition, this result is stateless and so remains valid. - bool invalidate(Function &, const PreservedAnalyses &) { return false; } + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &Inv) { +return false; + } AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI); @@ -54,7 +57,7 @@ class AMDGPUAAResult : public AAResultBase { class AMDGPUAA : public AnalysisInfoMixin { friend AnalysisInfoMixin; - static char PassID; + static AnalysisKey Key; public: using Result = AMDGPUAAResult; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index d3bea7f9469ef..7e9a0ddc0fc6b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -489,6 +489,10 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { }); } +void AMDGPUTargetMachine::registerAliasAnalyses(AAManager &AAM) { + AAM.registerFunctionAnalysis(); +} + void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, bool DebugPassManager) { PB.registerPipelineParsingCallback( @@ -543,6 +547,18 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, return false; }); + PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &FAM) { +FAM.registerPass([&] { return AMDGPUAA(); }); + }); + + PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) { +if (AAName == "amdgpu-aa") { + AAM.registerFunctionAnalysis(); + return true; +} +return false; + }); + PB.registerPipelineStartEPCallback([this, DebugPassManager]( ModulePassManager &PM, PassBuilder::OptimizationLevel Level) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index d5fd769912d02..abcc38ff73ba9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -58,6 +58,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { void registerPassBuilderCallbacks(PassBuilder &PB, bool DebugPassManager) override; + void registerAliasAnalyses(AAManager &) override; /// Get the integer value of a null pointer in the given address space. static int64_t getNullPointerValue(unsigned AddrSpace) { diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll index 7302361bfb68e..778e9613136d1 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -1,5 +1,7 @@ ; RUN: opt -mtriple=amdgcn-- -data-layou
[llvm-branch-commits] [llvm] de6d43f - Revert "[LoopNest] Allow empty basic blocks without loops"
Author: Whitney Tsang Date: 2021-01-04T20:42:21Z New Revision: de6d43f16cbaf2eae6fa161ea6e811b8f5f45174 URL: https://github.com/llvm/llvm-project/commit/de6d43f16cbaf2eae6fa161ea6e811b8f5f45174 DIFF: https://github.com/llvm/llvm-project/commit/de6d43f16cbaf2eae6fa161ea6e811b8f5f45174.diff LOG: Revert "[LoopNest] Allow empty basic blocks without loops" This reverts commit 9a17bff4f715a9f3ec89f4eacae8fdea1b74fe79. Added: Modified: llvm/include/llvm/Analysis/LoopNestAnalysis.h llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h llvm/lib/Analysis/LoopNestAnalysis.cpp llvm/lib/Transforms/Utils/BasicBlockUtils.cpp llvm/test/Analysis/LoopNestAnalysis/perfectnest.ll Removed: diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h index 692909db8341..4d77d735819f 100644 --- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h @@ -128,12 +128,6 @@ class LoopNest { [](const Loop *L) { return L->isLoopSimplifyForm(); }); } - /// Return true if all loops in the loop nest are in rotated form. - bool areAllLoopsRotatedForm() const { -return std::all_of(Loops.begin(), Loops.end(), - [](const Loop *L) { return L->isRotatedForm(); }); - } - StringRef getName() const { return Loops.front()->getName(); } protected: diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index fd5a7daf3add..64c569de1f58 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -244,12 +244,6 @@ unsigned SplitAllCriticalEdges(Function &F, const CriticalEdgeSplittingOptions &Options = CriticalEdgeSplittingOptions()); -/// Recursivelly traverse all empty 'single successor' basic blocks of \p From -/// (if there are any). Return the last basic block found or \p End if it was -/// reached during the search. -const BasicBlock &skipEmptyBlockUntil(const BasicBlock *From, - const BasicBlock *End); - /// Split the edge connecting the specified blocks, and return the newly created /// basic block between \p From and \p To. BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp index abc219a8bd32..ef10b7e97461 100644 --- a/llvm/lib/Analysis/LoopNestAnalysis.cpp +++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp @@ -16,7 +16,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; @@ -254,66 +253,49 @@ static bool checkLoopsStructure(const Loop &OuterLoop, const Loop &InnerLoop, // Ensure the only branch that may exist between the loops is the inner loop // guard. if (OuterLoopHeader != InnerLoopPreHeader) { -const BasicBlock &SingleSucc = -skipEmptyBlockUntil(OuterLoopHeader, InnerLoopPreHeader); - -// no conditional branch present -if (&SingleSucc != InnerLoopPreHeader) { - const BranchInst *BI = dyn_cast(SingleSucc.getTerminator()); - - if (!BI || BI != InnerLoop.getLoopGuardBranch()) -return false; - - bool InnerLoopExitContainsLCSSA = ContainsLCSSAPhi(*InnerLoopExit); - - // The successors of the inner loop guard should be the inner loop - // preheader or the outer loop latch possibly through empty blocks. - for (const BasicBlock *Succ : BI->successors()) { -const BasicBlock *PotentialInnerPreHeader = Succ; -const BasicBlock *PotentialOuterLatch = Succ; - -// Ensure the inner loop guard successor is empty before skipping -// blocks. -if (Succ->getInstList().size() == 1) { - PotentialInnerPreHeader = - &skipEmptyBlockUntil(Succ, InnerLoopPreHeader); - PotentialOuterLatch = &skipEmptyBlockUntil(Succ, OuterLoopLatch); -} - -if (PotentialInnerPreHeader == InnerLoopPreHeader) - continue; -if (PotentialOuterLatch == OuterLoopLatch) - continue; - -// If `InnerLoopExit` contains LCSSA Phi instructions, additional block -// may be inserted before the `OuterLoopLatch` to which `BI` jumps. The -// loops are still considered perfectly nested if the extra block only -// contains Phi instructions from InnerLoopExit and OuterLoopHeader. -if (InnerLoopExitContainsLCSSA && IsExtraPhiBlock(*Succ) && -Succ->getSingleSuccessor() == OuterLoopLatch) { - // Points to the extra block so that we can reference it later in the - // final check. We can also conclude that
[llvm-branch-commits] [llvm] 92be640 - [FPEnv][AMDGPU] Disable FSUB(-0, X)->FNEG(X) DAGCombine when subnormals are flushed
Author: Cameron McInally Date: 2021-01-04T14:44:10-06:00 New Revision: 92be640bd7d4fbc8e032a0aa81381a0246efa0be URL: https://github.com/llvm/llvm-project/commit/92be640bd7d4fbc8e032a0aa81381a0246efa0be DIFF: https://github.com/llvm/llvm-project/commit/92be640bd7d4fbc8e032a0aa81381a0246efa0be.diff LOG: [FPEnv][AMDGPU] Disable FSUB(-0,X)->FNEG(X) DAGCombine when subnormals are flushed This patch disables the FSUB(-0,X)->FNEG(X) DAG combine when we're flushing subnormals. It requires updating the existing AMDGPU tests to use the fneg IR instruction, in place of the old fsub(-0,X) canonical form, since AMDGPU is the only backend currently checking the DenormalMode flags. Note that this will require follow-up optimizations to make sure the FSUB(-0,X) form is handled appropriately Differential Revision: https://reviews.llvm.org/D93243 Added: Modified: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/test/CodeGen/AMDGPU/clamp-modifier.ll llvm/test/CodeGen/AMDGPU/clamp.ll llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll llvm/test/CodeGen/AMDGPU/fma-combine.ll llvm/test/CodeGen/AMDGPU/fneg-combines.ll llvm/test/CodeGen/AMDGPU/fpext-free.ll llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll llvm/test/CodeGen/AMDGPU/known-never-snan.ll llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll llvm/test/CodeGen/AMDGPU/mad-combine.ll llvm/test/CodeGen/AMDGPU/mad-mix.ll llvm/test/CodeGen/AMDGPU/rcp-pattern.ll llvm/test/CodeGen/AMDGPU/rsq.ll llvm/test/CodeGen/AMDGPU/v_mac.ll llvm/test/CodeGen/AMDGPU/v_mac_f16.ll Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 92b23df9e3af..6b1bd721a993 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13367,18 +13367,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // (fsub -0.0, N1) -> -N1 - // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the - // FSUB does not specify the sign bit of a NaN. Also note that for - // the same reason, the inverse transform is not safe, unless fast math - // flags are in play. if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) -return NegN1; - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) -return DAG.getNode(ISD::FNEG, DL, VT, N1); + // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are + // flushed to zero, unless all users treat denorms as zero (DAZ). + // FIXME: This transform will change the sign of a NaN and the behavior + // of a signaling NaN. It is only valid when a NoNaN flag is present. + DenormalMode DenormMode = DAG.getDenormalMode(VT); + if (DenormMode == DenormalMode::getIEEE()) { +if (SDValue NegN1 = +TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return NegN1; +if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, N1); + } } } diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll index 5a56a1a264af..4f3d6442da44 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -62,7 +62,7 @@ define amdgpu_kernel void @v_clamp_add_neg_src_f32(float addrspace(1)* %out, flo %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 %floor = call float @llvm.floor.f32(float %a) - %neg.floor = fsub float -0.0, %floor + %neg.floor = fneg float %floor %max = call float @llvm.maxnum.f32(float %neg.floor, float 0.0) %clamp = call float @llvm.minnum.f32(float %max, float 1.0) store float %clamp, float addrspace(1)* %out.gep diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll index 1e18b2fa1c1b..256bea7fb7fb 100644 --- a/llvm/test/CodeGen/AMDGPU/clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp.ll @@ -25,7 +25,7 @@ define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrs %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0) %med = call float @llvm.minnum.f32(float %max, float 1.0) @@ -42,7 +42,7 @@ define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float ad %out.gep = getelementptr float, float addrspace(1)*
[llvm-branch-commits] [llvm] 9766957 - [LoopUtils] reduce code for creatng reduction; NFC
Author: Sanjay Patel Date: 2021-01-04T16:05:03-05:00 New Revision: 976695752416f6ff51993ec1f3769e8a62eea2f2 URL: https://github.com/llvm/llvm-project/commit/976695752416f6ff51993ec1f3769e8a62eea2f2 DIFF: https://github.com/llvm/llvm-project/commit/976695752416f6ff51993ec1f3769e8a62eea2f2.diff LOG: [LoopUtils] reduce code for creatng reduction; NFC We can return from each case instead creating a temporary variable just to have a common return. Added: Modified: llvm/lib/Transforms/Utils/LoopUtils.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index e062eacf82b2..3245f5f21017 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -983,77 +983,53 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, RecurKind RdxKind, ArrayRef RedOps) { TargetTransformInfo::ReductionFlags RdxFlags; - RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || - RdxKind == RecurKind::UMax || + RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax || RdxKind == RecurKind::FMax; RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; if (!ForceReductionIntrinsic && !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); - auto *SrcVTy = cast(Src->getType()); - - std::function BuildFunc; + auto *SrcVecEltTy = cast(Src->getType())->getElementType(); switch (Opcode) { case Instruction::Add: -BuildFunc = [&]() { return Builder.CreateAddReduce(Src); }; -break; +return Builder.CreateAddReduce(Src); case Instruction::Mul: -BuildFunc = [&]() { return Builder.CreateMulReduce(Src); }; -break; +return Builder.CreateMulReduce(Src); case Instruction::And: -BuildFunc = [&]() { return Builder.CreateAndReduce(Src); }; -break; +return Builder.CreateAndReduce(Src); case Instruction::Or: -BuildFunc = [&]() { return Builder.CreateOrReduce(Src); }; -break; +return Builder.CreateOrReduce(Src); case Instruction::Xor: -BuildFunc = [&]() { return Builder.CreateXorReduce(Src); }; -break; +return Builder.CreateXorReduce(Src); case Instruction::FAdd: -BuildFunc = [&]() { - auto Rdx = Builder.CreateFAddReduce( - ConstantFP::getNegativeZero(SrcVTy->getElementType()), Src); - return Rdx; -}; -break; +return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy), +Src); case Instruction::FMul: -BuildFunc = [&]() { - Type *Ty = SrcVTy->getElementType(); - auto Rdx = Builder.CreateFMulReduce(ConstantFP::get(Ty, 1.0), Src); - return Rdx; -}; -break; +return Builder.CreateFMulReduce(ConstantFP::get(SrcVecEltTy, 1.0), Src); case Instruction::ICmp: switch (RdxKind) { case RecurKind::SMax: - BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, true); }; - break; + return Builder.CreateIntMaxReduce(Src, true); case RecurKind::SMin: - BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, true); }; - break; + return Builder.CreateIntMinReduce(Src, true); case RecurKind::UMax: - BuildFunc = [&]() { return Builder.CreateIntMaxReduce(Src, false); }; - break; + return Builder.CreateIntMaxReduce(Src, false); case RecurKind::UMin: - BuildFunc = [&]() { return Builder.CreateIntMinReduce(Src, false); }; - break; + return Builder.CreateIntMinReduce(Src, false); default: llvm_unreachable("Unexpected min/max reduction type"); } -break; case Instruction::FCmp: assert((RdxKind == RecurKind::FMax || RdxKind == RecurKind::FMin) && "Unexpected min/max reduction type"); if (RdxKind == RecurKind::FMax) - BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src); }; + return Builder.CreateFPMaxReduce(Src); else - BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src); }; -break; + return Builder.CreateFPMinReduce(Src); default: llvm_unreachable("Unhandled opcode"); } - return BuildFunc(); } Value *llvm::createTargetReduction(IRBuilderBase &B, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 58b6c5d - [LoopUtils] reorder logic for creating reduction; NFC
Author: Sanjay Patel Date: 2021-01-04T16:05:02-05:00 New Revision: 58b6c5d932a0d435ddfd13f4f5b011207e64297f URL: https://github.com/llvm/llvm-project/commit/58b6c5d932a0d435ddfd13f4f5b011207e64297f DIFF: https://github.com/llvm/llvm-project/commit/58b6c5d932a0d435ddfd13f4f5b011207e64297f.diff LOG: [LoopUtils] reorder logic for creating reduction; NFC If we are using a shuffle reduction, we don't need to go through the switch on opcode - return early. Added: Modified: llvm/lib/Transforms/Utils/LoopUtils.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 96f1d4219bac..e062eacf82b2 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -982,6 +982,15 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, unsigned Opcode, Value *Src, RecurKind RdxKind, ArrayRef RedOps) { + TargetTransformInfo::ReductionFlags RdxFlags; + RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || + RdxKind == RecurKind::UMax || + RdxKind == RecurKind::FMax; + RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; + if (!ForceReductionIntrinsic && + !TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) +return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); + auto *SrcVTy = cast(Src->getType()); std::function BuildFunc; @@ -1044,15 +1053,7 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, default: llvm_unreachable("Unhandled opcode"); } - TargetTransformInfo::ReductionFlags RdxFlags; - RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || - RdxKind == RecurKind::UMax || - RdxKind == RecurKind::FMax; - RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin; - if (ForceReductionIntrinsic || - TTI->useReductionIntrinsic(Opcode, Src->getType(), RdxFlags)) -return BuildFunc(); - return getShuffleReduction(Builder, Src, Opcode, RdxKind, RedOps); + return BuildFunc(); } Value *llvm::createTargetReduction(IRBuilderBase &B, ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] aa16903 - [test] Pin backedge-id-bug-xfail.ll to legacy PM
Author: Arthur Eubanks Date: 2021-01-04T13:09:42-08:00 New Revision: aa169033892f1f185047abc07fe6e58f726018b9 URL: https://github.com/llvm/llvm-project/commit/aa169033892f1f185047abc07fe6e58f726018b9 DIFF: https://github.com/llvm/llvm-project/commit/aa169033892f1f185047abc07fe6e58f726018b9.diff LOG: [test] Pin backedge-id-bug-xfail.ll to legacy PM The new PM doesn't have region passes, so this doesn't really make sense in a NPM context. Added: Modified: llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll Removed: diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll index e9c54151cf29..09d0ee841a88 100644 --- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll +++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/backedge-id-bug-xfail.ll @@ -1,5 +1,5 @@ ; XFAIL: * -; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -structurizecfg -verify-region-info %s -enable-new-pm=0 ; FIXME: Merge into backedge-id-bug ; Variant which has an issue with region construction ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 8e293fe - [NewPM][AMDGPU] Pass TargetMachine to AMDGPUSimplifyLibCallsPass
Author: Arthur Eubanks Date: 2021-01-04T13:48:09-08:00 New Revision: 8e293fe6ad06225d748bdb8a4414461451e33c16 URL: https://github.com/llvm/llvm-project/commit/8e293fe6ad06225d748bdb8a4414461451e33c16 DIFF: https://github.com/llvm/llvm-project/commit/8e293fe6ad06225d748bdb8a4414461451e33c16.diff LOG: [NewPM][AMDGPU] Pass TargetMachine to AMDGPUSimplifyLibCallsPass Missed in https://reviews.llvm.org/D93863. Added: Modified: llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 503f1022bdae..5d2189c87a1b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -77,7 +77,11 @@ FunctionPass *createAMDGPURewriteOutArgumentsPass(); FunctionPass *createSIModeRegisterPass(); struct AMDGPUSimplifyLibCallsPass : PassInfoMixin { + AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + TargetMachine &TM; }; struct AMDGPUUseNativeCallsPass : PassInfoMixin { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index eedcb2e1a793..c4d27f0a19d2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -1753,7 +1753,7 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) { PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) { - AMDGPULibCalls Simplifier; + AMDGPULibCalls Simplifier(&TM); Simplifier.initNativeFuncs(); bool Changed = false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7e9a0ddc0fc6..be7d86d02fb0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -520,7 +520,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, [this](StringRef PassName, FunctionPassManager &PM, ArrayRef) { if (PassName == "amdgpu-simplifylib") { - PM.addPass(AMDGPUSimplifyLibCallsPass()); + PM.addPass(AMDGPUSimplifyLibCallsPass(*this)); return true; } if (PassName == "amdgpu-usenative") { @@ -566,7 +566,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this)); FPM.addPass(AMDGPUUseNativeCallsPass()); if (EnableLibCallSimplify && Level != PassBuilder::OptimizationLevel::O0) - FPM.addPass(AMDGPUSimplifyLibCallsPass()); + FPM.addPass(AMDGPUSimplifyLibCallsPass(*this)); PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); }); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll index 0ae7d45454ed..40bb45de25f7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -5,6 +5,7 @@ ; RUN: opt -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s ; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-WXX %s ; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s +; RUN: opt -mtriple=amdgcn-- -passes='default' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s ; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s ; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefixes=OPT,OPT-W64 %s ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefixes=OPT,OPT-W32 %s ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] abbef2f - [ValueTracking] isGuaranteedNotToBePoison should return true on undef
Author: Juneyoung Lee Date: 2021-01-05T06:50:02+09:00 New Revision: abbef2fd46d48a0d92d86f0c00fa2973f8ae2c85 URL: https://github.com/llvm/llvm-project/commit/abbef2fd46d48a0d92d86f0c00fa2973f8ae2c85 DIFF: https://github.com/llvm/llvm-project/commit/abbef2fd46d48a0d92d86f0c00fa2973f8ae2c85.diff LOG: [ValueTracking] isGuaranteedNotToBePoison should return true on undef This is a one-line fix to isGuaranteedNotToBePoison to return true if undef is given. Added: Modified: llvm/lib/Analysis/ValueTracking.cpp llvm/unittests/Analysis/ValueTrackingTest.cpp Removed: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 303240d03c72..e15d4f0e4b07 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -4888,7 +4888,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V, if (auto *C = dyn_cast(V)) { if (isa(C)) - return PoisonOnly; + return PoisonOnly && !isa(C); if (isa(C) || isa(C) || isa(V) || isa(C) || isa(C)) diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp index 009166a24a1f..0d6577452560 100644 --- a/llvm/unittests/Analysis/ValueTrackingTest.cpp +++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp @@ -884,6 +884,10 @@ TEST_F(ValueTrackingTest, isGuaranteedNotToBeUndefOrPoison) { " ret void\n" "}\n"); EXPECT_EQ(isGuaranteedNotToBeUndefOrPoison(A), true); + EXPECT_EQ(isGuaranteedNotToBeUndefOrPoison(UndefValue::get(IntegerType::get(Context, 8))), false); + EXPECT_EQ(isGuaranteedNotToBeUndefOrPoison(PoisonValue::get(IntegerType::get(Context, 8))), false); + EXPECT_EQ(isGuaranteedNotToBePoison(UndefValue::get(IntegerType::get(Context, 8))), true); + EXPECT_EQ(isGuaranteedNotToBePoison(PoisonValue::get(IntegerType::get(Context, 8))), false); } TEST_F(ValueTrackingTest, isGuaranteedNotToBeUndefOrPoison_assume) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 36263a7 - [LoopUtils] remove redundant opcode parameter; NFC
Author: Sanjay Patel Date: 2021-01-04T17:05:28-05:00 New Revision: 36263a70d98afc36dea55e7a004d08455811 URL: https://github.com/llvm/llvm-project/commit/36263a70d98afc36dea55e7a004d08455811 DIFF: https://github.com/llvm/llvm-project/commit/36263a70d98afc36dea55e7a004d08455811.diff LOG: [LoopUtils] remove redundant opcode parameter; NFC While here, rename the inaccurate getRecurrenceBinOp() because that was also used to get CmpInst opcodes. The recurrence/reduction kind should always refer to the expected opcode for a reduction. SLP appears to be the only direct caller of createSimpleTargetReduction(), and that calling code ideally should not be carrying around both an opcode and a reduction kind. This should allow us to generalize reduction matching to use intrinsics instead of only binops. Added: Modified: llvm/include/llvm/Analysis/IVDescriptors.h llvm/include/llvm/Transforms/Utils/LoopUtils.h llvm/lib/Analysis/IVDescriptors.cpp llvm/lib/Transforms/Utils/LoopUtils.cpp llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/lib/Transforms/Vectorize/VPlan.cpp Removed: diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index 798eb430df08f..6bb6c4cae0a2c 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -139,9 +139,8 @@ class RecurrenceDescriptor { /// Returns identity corresponding to the RecurrenceKind. static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp); - /// Returns the opcode of binary operation corresponding to the - /// RecurrenceKind. - static unsigned getRecurrenceBinOp(RecurKind Kind); + /// Returns the opcode corresponding to the RecurrenceKind. + static unsigned getOpcode(RecurKind Kind); /// Returns true if Phi is a reduction of type Kind and adds it to the /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are @@ -178,9 +177,7 @@ class RecurrenceDescriptor { RecurKind getRecurrenceKind() const { return Kind; } - unsigned getRecurrenceBinOp() const { -return getRecurrenceBinOp(getRecurrenceKind()); - } + unsigned getOpcode() const { return getOpcode(getRecurrenceKind()); } FastMathFlags getFastMathFlags() const { return FMF; } diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index b29add4cba0e5..d606fa954f952 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -366,8 +366,7 @@ Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op, /// required to implement the reduction. /// Fast-math-flags are propagated using the IRBuilder's setting. Value *createSimpleTargetReduction(IRBuilderBase &B, - const TargetTransformInfo *TTI, - unsigned Opcode, Value *Src, + const TargetTransformInfo *TTI, Value *Src, RecurKind RdxKind, ArrayRef RedOps = None); diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 0bd4f98541587..a11faac093db0 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -800,8 +800,7 @@ Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp) { } } -/// This function translates the recurrence kind to an LLVM binary operator. -unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurKind Kind) { +unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) { switch (Kind) { case RecurKind::Add: return Instruction::Add; @@ -833,7 +832,7 @@ unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurKind Kind) { SmallVector RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const { SmallVector ReductionOperations; - unsigned RedOp = getRecurrenceBinOp(Kind); + unsigned RedOp = getOpcode(Kind); // Search down from the Phi to the LoopExitInstr, looking for instructions // with a single user of the correct type for the reduction. diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 3245f5f21017f..f2b94d9e78adc 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -979,9 +979,9 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src, Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder, const TargetTransformInfo *TTI, - unsigned Opcode, Value *Src, - RecurKind RdxKind, + Value *Src, Re
[llvm-branch-commits] [clang] f67d3db - [clang] - Also look for devtoolset-10
Author: Petr Hosek Date: 2021-01-04T14:24:46-08:00 New Revision: f67d3dbdb930eaf92668b47696e51ef0b2c3c3a5 URL: https://github.com/llvm/llvm-project/commit/f67d3dbdb930eaf92668b47696e51ef0b2c3c3a5 DIFF: https://github.com/llvm/llvm-project/commit/f67d3dbdb930eaf92668b47696e51ef0b2c3c3a5.diff LOG: [clang] - Also look for devtoolset-10 devtoolset-10 has just been released so look for it as well. Patch By: stephan.dollberg Differential Revision: https://reviews.llvm.org/D92792 Added: Modified: clang/lib/Driver/ToolChains/Gnu.cpp Removed: diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 4a075b12d1af..336ee13b2df5 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2049,6 +2049,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( // Non-Solaris is much simpler - most systems just go with "/usr". if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux) { // Yet, still look for RHEL devtoolsets. +Prefixes.push_back("/opt/rh/devtoolset-10/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-9/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-8/root/usr"); Prefixes.push_back("/opt/rh/devtoolset-7/root/usr"); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] b4f519b - [NFCI] DwarfEHPrepare: update DomTree in non-permissive mode, when present
Author: Roman Lebedev Date: 2021-01-05T01:26:36+03:00 New Revision: b4f519bddda853443405d21728154c481837e18b URL: https://github.com/llvm/llvm-project/commit/b4f519bddda853443405d21728154c481837e18b DIFF: https://github.com/llvm/llvm-project/commit/b4f519bddda853443405d21728154c481837e18b.diff LOG: [NFCI] DwarfEHPrepare: update DomTree in non-permissive mode, when present Being stricter will catch issues that would be just papered over in permissive mode, and is likely faster. Added: Modified: llvm/lib/CodeGen/DwarfEHPrepare.cpp Removed: diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp index c1b764214546..97e0162f35a1 100644 --- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -243,7 +243,7 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { new UnreachableInst(Ctx, UnwindBB); if (DTU && RequireAndPreserveDomTree) -DTU->applyUpdatesPermissive(Updates); +DTU->applyUpdates(Updates); return true; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 3fb5722 - [NFCI] SimplifyCFG: switch to non-permissive DomTree updates, where possible
Author: Roman Lebedev Date: 2021-01-05T01:26:36+03:00 New Revision: 3fb57222c4c0db02f13f32579fb83d0d488becad URL: https://github.com/llvm/llvm-project/commit/3fb57222c4c0db02f13f32579fb83d0d488becad DIFF: https://github.com/llvm/llvm-project/commit/3fb57222c4c0db02f13f32579fb83d0d488becad.diff LOG: [NFCI] SimplifyCFG: switch to non-permissive DomTree updates, where possible Notably, this doesn't switch *every* case, remaining cases don't actually pass sanity checks in non-permissve mode, and therefore require further analysis. Note that SimplifyCFG still defaults to not preserving DomTree by default, so this is effectively a NFC change. Added: Modified: llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp llvm/lib/Transforms/Utils/SimplifyCFG.cpp Removed: diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 3efdc0e9ea86..2c3454c46b30 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -77,6 +77,7 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, /// merge them together to promote recursive block merging. +// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { bool Changed = false; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index a4faf22b8294..567b2e02b71c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -872,6 +872,7 @@ static void setBranchWeights(Instruction *I, uint32_t TrueWeight, /// also a value comparison with the same value, and if that comparison /// determines the outcome of this comparison. If so, simplify TI. This does a /// very limited form of jump threading. +// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) { Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); @@ -924,7 +925,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( EraseTerminatorAndDCECond(TI); if (DTU) -DTU->applyUpdatesPermissive( +DTU->applyUpdates( {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}}); return true; @@ -956,7 +957,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( if (I.second == 0) Updates.push_back({DominatorTree::Delete, PredDef, I.first}); if (DTU) - DTU->applyUpdatesPermissive(Updates); + DTU->applyUpdates(Updates); LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n"); return true; @@ -1080,6 +1081,7 @@ static void FitWeights(MutableArrayRef Weights) { /// (either a switch or a branch on "X == c"). /// See if any of the predecessors of the terminator block are value comparisons /// on the same value. If so, and if safe to do so, fold them together. +// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI, IRBuilder<> &Builder) { BasicBlock *BB = TI->getParent(); @@ -1554,7 +1556,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, EraseTerminatorAndDCECond(BI); if (DTU) -DTU->applyUpdatesPermissive(Updates); +DTU->applyUpdates(Updates); return Changed; } @@ -2488,7 +2490,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU, Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB}); if (DTU) - DTU->applyUpdatesPermissive(Updates); + DTU->applyUpdates(Updates); // Recurse, simplifying any other constants. return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true; @@ -2660,7 +2662,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, OldTI->eraseFromParent(); if (DTU) -DTU->applyUpdatesPermissive(Updates); +DTU->applyUpdates(Updates); return true; } @@ -2668,6 +2670,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, /// If we found a conditional branch that goes to two returning blocks, /// try to merge them together into one return, /// introducing a select if the return values disagree. +// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { auto *BB = BI->getParent(); @@ -3169,7 +3172,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU, } } // Update PHI Node. -
[llvm-branch-commits] [llvm] ed9de61 - [SimplifyCFGPass] mergeEmptyReturnBlocks(): switch to non-permissive DomTree updates
Author: Roman Lebedev Date: 2021-01-05T01:26:36+03:00 New Revision: ed9de61cc3e280f84e3f0f98a49af21c7e59c4c9 URL: https://github.com/llvm/llvm-project/commit/ed9de61cc3e280f84e3f0f98a49af21c7e59c4c9 DIFF: https://github.com/llvm/llvm-project/commit/ed9de61cc3e280f84e3f0f98a49af21c7e59c4c9.diff LOG: [SimplifyCFGPass] mergeEmptyReturnBlocks(): switch to non-permissive DomTree updates ... which requires not inserting an edge that already exists. Added: Modified: llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp Removed: diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 2c3454c46b30..c0edde8648f5 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -77,7 +77,6 @@ STATISTIC(NumSimpl, "Number of blocks simplified"); /// If we have more than one empty (other than phi node) return blocks, /// merge them together to promote recursive block merging. -// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { bool Changed = false; @@ -143,7 +142,10 @@ static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { if (DTU) { for (auto *Predecessor : predecessors(&BB)) { Updates.push_back({DominatorTree::Delete, Predecessor, &BB}); - Updates.push_back({DominatorTree::Insert, Predecessor, RetBlock}); + // But, iff Predecessor already branches to RetBlock, + // don't (re-)add DomTree edge, because it already exists. + if (!is_contained(successors(Predecessor), RetBlock)) +Updates.push_back({DominatorTree::Insert, Predecessor, RetBlock}); } } BB.replaceAllUsesWith(RetBlock); @@ -176,7 +178,7 @@ static bool mergeEmptyReturnBlocks(Function &F, DomTreeUpdater *DTU) { } if (DTU) { -DTU->applyUpdatesPermissive(Updates); +DTU->applyUpdates(Updates); for (auto *BB : DeadBlocks) DTU->deleteBB(BB); } else { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 110b3d7 - [SimplifyCFG] SimplifyEqualityComparisonWithOnlyPredecessor(): switch to non-permissive DomTree updates
Author: Roman Lebedev Date: 2021-01-05T01:26:37+03:00 New Revision: 110b3d7855ef71a7d43a0779b2e1c32e1a31daae URL: https://github.com/llvm/llvm-project/commit/110b3d7855ef71a7d43a0779b2e1c32e1a31daae DIFF: https://github.com/llvm/llvm-project/commit/110b3d7855ef71a7d43a0779b2e1c32e1a31daae.diff LOG: [SimplifyCFG] SimplifyEqualityComparisonWithOnlyPredecessor(): switch to non-permissive DomTree updates ... which requires not deleting an edge that just got deleted. Added: Modified: llvm/lib/Transforms/Utils/SimplifyCFG.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 60fa8a876b53..a61e48d84f01 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -872,7 +872,6 @@ static void setBranchWeights(Instruction *I, uint32_t TrueWeight, /// also a value comparison with the same value, and if that comparison /// determines the outcome of this comparison. If so, simplify TI. This does a /// very limited form of jump threading. -// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) { Value *PredVal = isValueEqualityComparison(Pred->getTerminator()); @@ -988,14 +987,14 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( if (!TheRealDest) TheRealDest = ThisDef; - SmallVector Updates; + SmallSetVector RemovedSuccs; // Remove PHI node entries for dead edges. BasicBlock *CheckEdge = TheRealDest; for (BasicBlock *Succ : successors(TIBB)) if (Succ != CheckEdge) { + RemovedSuccs.insert(Succ); Succ->removePredecessor(TIBB); - Updates.push_back({DominatorTree::Delete, TIBB, Succ}); } else CheckEdge = nullptr; @@ -1008,8 +1007,13 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor( << "\n"); EraseTerminatorAndDCECond(TI); - if (DTU) -DTU->applyUpdatesPermissive(Updates); + if (DTU) { +SmallVector Updates; +Updates.reserve(RemovedSuccs.size()); +for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc}); +DTU->applyUpdates(Updates); + } return true; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] a8604e3 - [SimplifyCFG] simplifyIndirectBr(): switch to non-permissive DomTree updates
Author: Roman Lebedev Date: 2021-01-05T01:26:36+03:00 New Revision: a8604e3d5b7112da11508f100805d65a4eddeb33 URL: https://github.com/llvm/llvm-project/commit/a8604e3d5b7112da11508f100805d65a4eddeb33 DIFF: https://github.com/llvm/llvm-project/commit/a8604e3d5b7112da11508f100805d65a4eddeb33.diff LOG: [SimplifyCFG] simplifyIndirectBr(): switch to non-permissive DomTree updates ... which requires not deleting an edge that just got deleted. Added: Modified: llvm/lib/Transforms/Utils/SimplifyCFG.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 567b2e02b71c..60fa8a876b53 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6218,19 +6218,18 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { return false; } -// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { BasicBlock *BB = IBI->getParent(); bool Changed = false; // Eliminate redundant destinations. - std::vector Updates; SmallPtrSet Succs; + SmallSetVector RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { if (!Dest->hasAddressTaken()) -Updates.push_back({DominatorTree::Delete, BB, Dest}); +RemovedSuccs.insert(Dest); Dest->removePredecessor(BB); IBI->removeDestination(i); --i; @@ -6239,9 +6238,13 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) { } } - if (DTU) -DTU->applyUpdatesPermissive(Updates); - Updates.clear(); + if (DTU) { +std::vector Updates; +Updates.reserve(RemovedSuccs.size()); +for (auto *RemovedSucc : RemovedSuccs) + Updates.push_back({DominatorTree::Delete, BB, RemovedSucc}); +DTU->applyUpdates(Updates); + } if (IBI->getNumDestinations() == 0) { // If the indirectbr has no successors, change it to unreachable. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 32c47eb - [SimplifyCFG] SimplifyCondBranchToTwoReturns(): switch to non-permissive DomTree updates
Author: Roman Lebedev Date: 2021-01-05T01:26:37+03:00 New Revision: 32c47ebef18d904445ce909e4a6922ffbfe4053f URL: https://github.com/llvm/llvm-project/commit/32c47ebef18d904445ce909e4a6922ffbfe4053f DIFF: https://github.com/llvm/llvm-project/commit/32c47ebef18d904445ce909e4a6922ffbfe4053f.diff LOG: [SimplifyCFG] SimplifyCondBranchToTwoReturns(): switch to non-permissive DomTree updates ... which requires not deleting an edge that just got deleted, because we could be dealing with a block that didn't go through ConstantFoldTerminator() yet, and thus has a degenerate cond br with matching true/false destinations. Added: Modified: llvm/lib/Transforms/Utils/SimplifyCFG.cpp Removed: diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index a61e48d84f01..9f808278d899 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2674,13 +2674,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, /// If we found a conditional branch that goes to two returning blocks, /// try to merge them together into one return, /// introducing a select if the return values disagree. -// FIXME: switch to non-permissive DomTreeUpdater::applyUpdates(). bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder) { auto *BB = BI->getParent(); assert(BI->isConditional() && "Must be a conditional branch"); BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); + // NOTE: destinations may match, this could be degenerate uncond branch. ReturnInst *TrueRet = cast(TrueSucc->getTerminator()); ReturnInst *FalseRet = cast(FalseSucc->getTerminator()); @@ -2702,8 +2702,11 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, Builder.CreateRetVoid(); EraseTerminatorAndDCECond(BI); if (DTU) { - DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, TrueSucc}, - {DominatorTree::Delete, BB, FalseSucc}}); + SmallVector Updates; + Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); + if (TrueSucc != FalseSucc) +Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); + DTU->applyUpdates(Updates); } return true; } @@ -2761,10 +2764,12 @@ bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI, << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc); EraseTerminatorAndDCECond(BI); - if (DTU) { -DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, TrueSucc}, - {DominatorTree::Delete, BB, FalseSucc}}); +SmallVector Updates; +Updates.push_back({DominatorTree::Delete, BB, TrueSucc}); +if (TrueSucc != FalseSucc) + Updates.push_back({DominatorTree::Delete, BB, FalseSucc}); +DTU->applyUpdates(Updates); } return true; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] e30fbbe - [JumpThreading][NewPM] Skip when target has divergent CF
Author: Arthur Eubanks Date: 2021-01-04T16:08:08-08:00 New Revision: e30fbbe9a5359f5d88fbc6045f320a120fc9a5af URL: https://github.com/llvm/llvm-project/commit/e30fbbe9a5359f5d88fbc6045f320a120fc9a5af DIFF: https://github.com/llvm/llvm-project/commit/e30fbbe9a5359f5d88fbc6045f320a120fc9a5af.diff LOG: [JumpThreading][NewPM] Skip when target has divergent CF Matches the legacy pass. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D94028 Added: Modified: llvm/lib/Transforms/Scalar/JumpThreading.cpp llvm/test/Transforms/JumpThreading/divergent-target-test.ll Removed: diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index ce191144297b..e8de6f425db6 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -341,6 +341,10 @@ bool JumpThreading::runOnFunction(Function &F) { PreservedAnalyses JumpThreadingPass::run(Function &F, FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult(F); + // Jump Threading has no sense for the targets with divergent CF + if (TTI.hasBranchDivergence()) +return PreservedAnalyses::all(); auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); auto &LVI = AM.getResult(F); diff --git a/llvm/test/Transforms/JumpThreading/divergent-target-test.ll b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll index 4f7d237691c8..34060fbb09d9 100644 --- a/llvm/test/Transforms/JumpThreading/divergent-target-test.ll +++ b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll @@ -1,6 +1,8 @@ ; REQUIRES: amdgpu-registered-target && x86-registered-target ; RUN: opt < %s -mtriple=amdgcn -jump-threading -S | FileCheck %s -check-prefixes=CHECK,DIVERGENT +; RUN: opt < %s -mtriple=amdgcn -passes=jump-threading -S | FileCheck %s -check-prefixes=CHECK,DIVERGENT ; RUN: opt < %s -mtriple=x86_64 -jump-threading -S | FileCheck %s -check-prefixes=CHECK,UNIFORM +; RUN: opt < %s -mtriple=x86_64 -passes=jump-threading -S | FileCheck %s -check-prefixes=CHECK,UNIFORM ; Here we assure that for the target with no branch divergence usual Jump Threading optimization performed ; For target with branch divergence - no optimization, so the IR is unchanged. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] c4f12a0 - [WebAssembly] Remove old SDT_WebAssemblyCalls (NFC)
Author: Heejin Ahn Date: 2021-01-04T16:31:16-08:00 New Revision: c4f12a07a44c1d5ae10eb8763f0f596837e2085e URL: https://github.com/llvm/llvm-project/commit/c4f12a07a44c1d5ae10eb8763f0f596837e2085e DIFF: https://github.com/llvm/llvm-project/commit/c4f12a07a44c1d5ae10eb8763f0f596837e2085e.diff LOG: [WebAssembly] Remove old SDT_WebAssemblyCalls (NFC) These are not used anymore. Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D94036 Added: Modified: llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td Removed: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 5f421e24123a..40a6545530be 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -74,8 +74,6 @@ def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; -def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>; def SDT_WebAssemblyBrTable: SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] f28b026 - [InstSimplify] add a test for gep with poison operand (NFC)
Author: Juneyoung Lee Date: 2021-01-05T11:03:11+09:00 New Revision: f28b026d32ca20208e24cab3aabb9d9fb0f1948a URL: https://github.com/llvm/llvm-project/commit/f28b026d32ca20208e24cab3aabb9d9fb0f1948a DIFF: https://github.com/llvm/llvm-project/commit/f28b026d32ca20208e24cab3aabb9d9fb0f1948a.diff LOG: [InstSimplify] add a test for gep with poison operand (NFC) Added: Modified: llvm/test/Transforms/InstSimplify/gep.ll Removed: diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll index f60f0cb7fdca..804ce7f10a8c 100644 --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -218,7 +218,7 @@ define @ptr_idx_mix_scalar_scalable_vector() { ; Check ConstantExpr::getGetElementPtr() using ElementCount for size queries - end. -; TODO: this should return poison +; TODO: these should return poison define i8* @poison() { ; CHECK-LABEL: @poison( @@ -227,3 +227,12 @@ define i8* @poison() { %v = getelementptr i8, i8* poison, i64 1 ret i8* %v } + +define i8* @poison2(i8* %baseptr) { +; CHECK-LABEL: @poison2( +; CHECK-NEXT:[[V:%.*]] = getelementptr i8, i8* [[BASEPTR:%.*]], i64 poison +; CHECK-NEXT:ret i8* [[V]] +; + %v = getelementptr i8, i8* %baseptr, i64 poison + ret i8* %v +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] f665a8c - [InstSimplify] gep with poison operand is poison
Author: Juneyoung Lee Date: 2021-01-05T11:07:49+09:00 New Revision: f665a8c5b8b42d88f89e1a3594b7d410ef206c32 URL: https://github.com/llvm/llvm-project/commit/f665a8c5b8b42d88f89e1a3594b7d410ef206c32 DIFF: https://github.com/llvm/llvm-project/commit/f665a8c5b8b42d88f89e1a3594b7d410ef206c32.diff LOG: [InstSimplify] gep with poison operand is poison This is a tiny update to fold gep poison into poison. :) Alive2 proofs: https://alive2.llvm.org/ce/z/7Nwdri https://alive2.llvm.org/ce/z/sDP4sC Added: Modified: llvm/lib/Analysis/InstructionSimplify.cpp llvm/test/Transforms/InstSimplify/gep.ll Removed: diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index dfaf36a96953..659b71fae6a0 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -4242,6 +4242,11 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, else if (VectorType *VT = dyn_cast(Ops[1]->getType())) GEPTy = VectorType::get(GEPTy, VT->getElementCount()); + // getelementptr poison, idx -> poison + // getelementptr baseptr, poison -> poison + if (any_of(Ops, [](const auto *V) { return isa(V); })) +return PoisonValue::get(GEPTy); + if (Q.isUndefValue(Ops[0])) return UndefValue::get(GEPTy); diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll index 804ce7f10a8c..e6670e4a9345 100644 --- a/llvm/test/Transforms/InstSimplify/gep.ll +++ b/llvm/test/Transforms/InstSimplify/gep.ll @@ -222,7 +222,7 @@ define @ptr_idx_mix_scalar_scalable_vector() { define i8* @poison() { ; CHECK-LABEL: @poison( -; CHECK-NEXT:ret i8* undef +; CHECK-NEXT:ret i8* poison ; %v = getelementptr i8, i8* poison, i64 1 ret i8* %v @@ -230,8 +230,7 @@ define i8* @poison() { define i8* @poison2(i8* %baseptr) { ; CHECK-LABEL: @poison2( -; CHECK-NEXT:[[V:%.*]] = getelementptr i8, i8* [[BASEPTR:%.*]], i64 poison -; CHECK-NEXT:ret i8* [[V]] +; CHECK-NEXT:ret i8* poison ; %v = getelementptr i8, i8* %baseptr, i64 poison ret i8* %v ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ae61485 - [UpdateTestChecks] Fix PowerPC RE to support AIX assembly
Author: Qiu Chaofan Date: 2021-01-05T10:28:00+08:00 New Revision: ae614851631387f86cb7ab1f33a4851a6549c279 URL: https://github.com/llvm/llvm-project/commit/ae614851631387f86cb7ab1f33a4851a6549c279 DIFF: https://github.com/llvm/llvm-project/commit/ae614851631387f86cb7ab1f33a4851a6549c279.diff LOG: [UpdateTestChecks] Fix PowerPC RE to support AIX assembly Current update_llc_test_checks.py cannot generate checks for AIX (powerpc64-ibm-aix-xcoff) properly. Assembly generated is little bit different from Linux. So I use begin function comment here to capture function name. Reviewed By: MaskRay, steven.zhang Differential Revision: https://reviews.llvm.org/D93676 Added: Modified: llvm/test/CodeGen/PowerPC/aix-lr.ll llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.nogenerated.expected llvm/utils/UpdateTestChecks/asm.py Removed: diff --git a/llvm/test/CodeGen/PowerPC/aix-lr.ll b/llvm/test/CodeGen/PowerPC/aix-lr.ll index ea92daf04f07..38ebf297e591 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lr.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff < %s | \ ; RUN: FileCheck --check-prefix=32BIT %s @@ -5,25 +6,32 @@ ; RUN: FileCheck --check-prefix=64BIT %s define void @bar() { +; 32BIT-LABEL: bar: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT:mflr 0 +; 32BIT-NEXT:stw 0, 8(1) +; 32BIT-NEXT:stwu 1, -64(1) +; 32BIT-NEXT:bl .foo[PR] +; 32BIT-NEXT:nop +; 32BIT-NEXT:addi 1, 1, 64 +; 32BIT-NEXT:lwz 0, 8(1) +; 32BIT-NEXT:mtlr 0 +; 32BIT-NEXT:blr +; +; 64BIT-LABEL: bar: +; 64BIT: # %bb.0: # %entry +; 64BIT-NEXT:mflr 0 +; 64BIT-NEXT:std 0, 16(1) +; 64BIT-NEXT:stdu 1, -112(1) +; 64BIT-NEXT:bl .foo[PR] +; 64BIT-NEXT:nop +; 64BIT-NEXT:addi 1, 1, 112 +; 64BIT-NEXT:ld 0, 16(1) +; 64BIT-NEXT:mtlr 0 +; 64BIT-NEXT:blr entry: -; 32BIT: mflr 0 -; 32BIT: stw 0, 8(1) -; 32BIT: stwu 1, -64(1) -; 32BIT: bl .foo -; 32BIT: nop -; 32BIT: addi 1, 1, 64 -; 32BIT: lwz 0, 8(1) -; 32BIT: mtlr 0 -; 64BIT: mflr 0 -; 64BIT: std 0, 16(1) -; 64BIT: stdu 1, -112(1) -; 64BIT: bl .foo -; 64BIT: nop -; 64BIT: addi 1, 1, 112 -; 64BIT: ld 0, 16(1) -; 64BIT: mtlr 0 call void bitcast (void (...)* @foo to void ()*)() ret void diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll index cd545199697f..d31a3361635a 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll @@ -1,4 +1,5 @@ ; RUN: llc -enable-machine-outliner -mtriple=ppc32-unknown-linux < %s | FileCheck %s +; RUN: llc -enable-machine-outliner -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s -check-prefix=AIX ; ; NOTE: Machine outliner doesn't run. @x = global i32 0, align 4 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected index 57298fa6e019..1fca598d23fe 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: llc -enable-machine-outliner -mtriple=ppc32-unknown-linux < %s | FileCheck %s +; RUN: llc -enable-machine-outliner -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s -check-prefix=AIX ; NOTE: Machine outliner doesn't run. @x = global i32 0, align 4 @@ -127,3 +128,62 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT:lwz 31, 28(1) ; CHECK-NEXT:addi 1, 1, 32 ; CHECK-NEXT:blr +; +; AIX-LABEL: check_boundaries: +; AIX: # %bb.0: +; AIX-NEXT:stw 31, -4(1) +; AIX-NEXT:stwu 1, -48(1) +; AIX-NEXT:mr 31, 1 +; AIX-NEXT:li 4, 0 +; AIX-NEXT:li 3, 1 +; AIX-NEXT:stw 4, 40(31) +; AIX-NEXT:li 4, 2 +; AIX-NEXT:li 5, 3 +; AIX-NEXT:li 6, 4 +; AIX-NEXT:cmplwi 3, 0 +; AIX-NEXT:stw 3, 36(31) +; AIX-NEXT:stw 4, 32(31) +; AIX-NEXT:stw 5, 28(31) +; AIX-NEXT:stw 6, 24(31) +; AIX-NEXT:beq 0, L..BB0_2 +; AIX-NEXT: # %bb.1: +; AIX-NEXT:stw
[llvm-branch-commits] [llvm] d51d72b - [RISCV] Rename RVV intrinsics class (NFC)
Author: Evandro Menezes Date: 2021-01-04T20:32:30-06:00 New Revision: d51d72bbb91bf1179e58a40998efb3be6bb1ca6f URL: https://github.com/llvm/llvm-project/commit/d51d72bbb91bf1179e58a40998efb3be6bb1ca6f DIFF: https://github.com/llvm/llvm-project/commit/d51d72bbb91bf1179e58a40998efb3be6bb1ca6f.diff LOG: [RISCV] Rename RVV intrinsics class (NFC) Rename the class `RISCVUnaryAAMask` to `RISCVBinaryAAAMask`, since it has two input arguments. Added: Modified: llvm/include/llvm/IR/IntrinsicsRISCV.td Removed: diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index ebb93ffcfc12..fe0d6b00a3c2 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -189,19 +189,19 @@ let TargetPrefix = "riscv" in { LLVMPointerType>, llvm_anyvector_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], [NoCapture>, IntrWriteMem]>, RISCVVIntrinsic; - // For destination vector type is the same as first source vector (with mask). - // Input: (maskedoff, vector_in, mask, vl) - class RISCVUnaryAAMask -: Intrinsic<[llvm_anyvector_ty], -[LLVMMatchType<0>, LLVMMatchType<0>, - LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], -[IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first and second source vector. // Input: (vector_in, vector_in, vl) class RISCVBinaryAAANoMask : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem]>, RISCVVIntrinsic; + // For destination vector type is the same as first and second source vector. + // Input: (vector_in, vector_in, vl) + class RISCVBinaryAAAMask +: Intrinsic<[llvm_anyvector_ty], +[LLVMMatchType<0>, LLVMMatchType<0>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty], +[IntrNoMem]>, RISCVVIntrinsic; // For destination vector type is the same as first source vector. // Input: (vector_in, vector_in/scalar_in, vl) class RISCVBinaryAAXNoMask @@ -704,7 +704,7 @@ let TargetPrefix = "riscv" in { defm vrgather : RISCVBinaryAAX; - def "int_riscv_vcompress_mask" : RISCVUnaryAAMask; + def "int_riscv_vcompress_mask" : RISCVBinaryAAAMask; defm vaaddu : RISCVSaturatingBinaryAAX; defm vaadd : RISCVSaturatingBinaryAAX; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 2962f11 - [NFC] Add the getSizeInBytes() interface for MachineConstantPoolValue
Author: QingShan Zhang Date: 2021-01-05T03:22:45Z New Revision: 2962f1149c8fccf8e865654ce11b3f1312165651 URL: https://github.com/llvm/llvm-project/commit/2962f1149c8fccf8e865654ce11b3f1312165651 DIFF: https://github.com/llvm/llvm-project/commit/2962f1149c8fccf8e865654ce11b3f1312165651.diff LOG: [NFC] Add the getSizeInBytes() interface for MachineConstantPoolValue Current implementation assumes that, each MachineConstantPoolValue takes up sizeof(MachineConstantPoolValue::Ty) bytes. For PowerPC, we want to lump all the constants with the same type as one MachineConstantPoolValue to save the cost that calculate the TOC entry for each const. So, we need to extend the MachineConstantPoolValue that break this assumption. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D89108 Added: Modified: llvm/include/llvm/CodeGen/MachineConstantPool.h llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp llvm/lib/CodeGen/MachineFunction.cpp llvm/lib/Target/ARM/ARMConstantIslandPass.cpp llvm/lib/Target/Mips/MipsConstantIslandPass.cpp llvm/lib/Target/X86/X86MCInstLower.cpp Removed: diff --git a/llvm/include/llvm/CodeGen/MachineConstantPool.h b/llvm/include/llvm/CodeGen/MachineConstantPool.h index cfc9ca88c976..a9bc0ce300b2 100644 --- a/llvm/include/llvm/CodeGen/MachineConstantPool.h +++ b/llvm/include/llvm/CodeGen/MachineConstantPool.h @@ -41,10 +41,10 @@ class MachineConstantPoolValue { explicit MachineConstantPoolValue(Type *ty) : Ty(ty) {} virtual ~MachineConstantPoolValue() = default; - /// getType - get type of this MachineConstantPoolValue. - /// Type *getType() const { return Ty; } + virtual unsigned getSizeInBytes(const DataLayout &DL) const; + virtual int getExistingMachineCPValue(MachineConstantPool *CP, Align Alignment) = 0; @@ -94,7 +94,7 @@ class MachineConstantPoolEntry { Align getAlign() const { return Alignment; } - Type *getType() const; + unsigned getSizeInBytes(const DataLayout &DL) const; /// This method classifies the entry according to whether or not it may /// generate a relocation entry. This must be conservative, so if it might diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 6732c35e2094..85a5d0c59b83 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1970,8 +1970,7 @@ void AsmPrinter::emitConstantPool() { unsigned NewOffset = alignTo(Offset, CPE.getAlign()); OutStreamer->emitZeros(NewOffset - Offset); - Type *Ty = CPE.getType(); - Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); + Offset = NewOffset + CPE.getSizeInBytes(getDataLayout()); OutStreamer->emitLabel(Sym); if (CPE.isMachineConstantPoolEntry()) diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 1eb191465ac9..3f44578b1a2c 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1107,10 +1107,14 @@ Printable llvm::printJumpTableEntryReference(unsigned Idx) { void MachineConstantPoolValue::anchor() {} -Type *MachineConstantPoolEntry::getType() const { +unsigned MachineConstantPoolValue::getSizeInBytes(const DataLayout &DL) const { + return DL.getTypeAllocSize(Ty); +} + +unsigned MachineConstantPoolEntry::getSizeInBytes(const DataLayout &DL) const { if (isMachineConstantPoolEntry()) -return Val.MachineCPVal->getType(); - return Val.ConstVal->getType(); +return Val.MachineCPVal->getSizeInBytes(DL); + return DL.getTypeAllocSize(Val.ConstVal->getType()); } bool MachineConstantPoolEntry::needsRelocation() const { @@ -1123,7 +1127,7 @@ SectionKind MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { if (needsRelocation()) return SectionKind::getReadOnlyWithRel(); - switch (DL->getTypeAllocSize(getType())) { + switch (getSizeInBytes(*DL)) { case 4: return SectionKind::getMergeableConst4(); case 8: diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index da7bf6170255..886bc2965969 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -514,7 +514,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector &CPEMIs) const DataLayout &TD = MF->getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { -unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); +unsigned Size = CPs[i].getSizeInBytes(TD); Align Alignment = CPs[i].getAlign(); // Verify that all constant pool entries are a multiple of their alignment. // If not, we would have to pad them out so that instructions stay aligned. diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib
[llvm-branch-commits] [llvm] 48340fb - [NFC] [PowerPC] Update vec_constants test to reflect more patterns
Author: Qiu Chaofan Date: 2021-01-05T11:29:08+08:00 New Revision: 48340fbe6a1a126298c4fe16dcd186d94e485203 URL: https://github.com/llvm/llvm-project/commit/48340fbe6a1a126298c4fe16dcd186d94e485203 DIFF: https://github.com/llvm/llvm-project/commit/48340fbe6a1a126298c4fe16dcd186d94e485203.diff LOG: [NFC] [PowerPC] Update vec_constants test to reflect more patterns This patch uses update_llc_check script to update vec_constants.ll, and add two cases to cover 'vsplti+vsldoi' with 16-bit and 24-bit offset. Added: Modified: llvm/test/CodeGen/PowerPC/vec_constants.ll Removed: diff --git a/llvm/test/CodeGen/PowerPC/vec_constants.ll b/llvm/test/CodeGen/PowerPC/vec_constants.ll index d9257c0b41c5..71f448ee66b2 100644 --- a/llvm/test/CodeGen/PowerPC/vec_constants.ll +++ b/llvm/test/CodeGen/PowerPC/vec_constants.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s @@ -20,65 +21,83 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind { } define <4 x i32> @test_30() nounwind { - ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 > - ; CHECK-LABEL: test_30: -; CHECK: vspltisw -; CHECK-NEXT: vadduwm -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, 15 +; CHECK-NEXT:vadduwm 2, 2, 2 +; CHECK-NEXT:blr + ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 > } define <4 x i32> @test_29() nounwind { - ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 > - ; CHECK-LABEL: test_29: -; CHECK: vspltisw -; CHECK-NEXT: vspltisw -; CHECK-NEXT: vsubuwm -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 3, -16 +; CHECK-NEXT:vspltisw 2, 13 +; CHECK-NEXT:vsubuwm 2, 2, 3 +; CHECK-NEXT:blr + ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 > } define <8 x i16> @test_n30() nounwind { - ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 > - ; CHECK-LABEL: test_n30: -; CHECK: vspltish -; CHECK-NEXT: vadduhm -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltish 2, -15 +; CHECK-NEXT:vadduhm 2, 2, 2 +; CHECK-NEXT:blr + ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 > } define <16 x i8> @test_n104() nounwind { - ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 > - ; CHECK-LABEL: test_n104: -; CHECK: vspltisb -; CHECK-NEXT: vslb -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisb 2, -13 +; CHECK-NEXT:vslb 2, 2, 2 +; CHECK-NEXT:blr + ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 > } define <4 x i32> @test_vsldoi() nounwind { - ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 > - ; CHECK-LABEL: test_vsldoi: -; CHECK: vspltisw -; CHECK-NEXT: vsldoi -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, 2 +; CHECK-NEXT:vsldoi 2, 2, 2, 1 +; CHECK-NEXT:blr + ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 > } define <8 x i16> @test_vsldoi_65023() nounwind { +; CHECK-LABEL: test_vsldoi_65023: +; CHECK: # %bb.0: +; CHECK-NEXT:vspltish 2, -3 +; CHECK-NEXT:vsldoi 2, 2, 2, 1 +; CHECK-NEXT:blr ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 > +} -; CHECK-LABEL: test_vsldoi_65023: -; CHECK: vspltish -; CHECK-NEXT: vsldoi -; CHECK-NEXT: blr +define <4 x i32> @test_vsldoi_x16() nounwind { +; CHECK-LABEL: test_vsldoi_x16: +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, -3 +; CHECK-NEXT:vsldoi 2, 2, 2, 2 +; CHECK-NEXT:blr + ret <4 x i32> } -define <4 x i32> @test_rol() nounwind { - ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 > +define <4 x i32> @test_vsldoi_x24() nounwind { +; CHECK-LABEL: test_vsldoi_x24: +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, -3 +; CHECK-NEXT:vsldoi 2, 2, 2, 3 +; CHECK-NEXT:blr + ret <4 x i32> +} +define <4 x i32> @test_rol() nounwind { ; CHECK-LABEL: test_rol: -; CHECK: vspltisw -; CHECK-NEXT: vrlw -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, -12 +; CHECK-NEXT:vrlw 2, 2, 2 +; CHECK-NEXT:blr + ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 > } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/c
[llvm-branch-commits] [llvm] b6c8feb - [NFC] [PowerPC] Remove dead code in BUILD_VECTOR peephole
Author: Qiu Chaofan Date: 2021-01-05T11:35:00+08:00 New Revision: b6c8feb29fce39121884f7e08ec6eb0f58da3fb7 URL: https://github.com/llvm/llvm-project/commit/b6c8feb29fce39121884f7e08ec6eb0f58da3fb7 DIFF: https://github.com/llvm/llvm-project/commit/b6c8feb29fce39121884f7e08ec6eb0f58da3fb7.diff LOG: [NFC] [PowerPC] Remove dead code in BUILD_VECTOR peephole The piece of code tries to use splat+shift to lower build_vector with repeating bit pattern. And immediate field of vector splat is only 5 bits (-16~15). It iterates over them one by one to find which shifts/rotates to number in build_vector. This patch removes code to try matching constant with algebraic right-shift because that's meaningless - any negative number's algebraic right-shift won't produce result smaller than itself. Besides, code (int)((unsigned)i >> j) means logical shift-right in C. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D93937 Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e951679f92fa..1b1e9e019476 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9555,17 +9555,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } -// vsplti + sra self. -if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); - static const unsigned IIDs[] = { // Intrinsic to use for each size. -Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, -Intrinsic::ppc_altivec_vsraw - }; - Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); -} - // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 3e2b424 - Remove RefSCC::handleTrivialEdgeInsertion
Author: Xun Li Date: 2021-01-04T20:21:01-08:00 New Revision: 3e2b42489f897ededae1d3269dcaf084da692111 URL: https://github.com/llvm/llvm-project/commit/3e2b42489f897ededae1d3269dcaf084da692111 DIFF: https://github.com/llvm/llvm-project/commit/3e2b42489f897ededae1d3269dcaf084da692111.diff LOG: Remove RefSCC::handleTrivialEdgeInsertion This function no longer does anything useful. It probably did something originally but latter changes removed them and didn't clean up this function. The checks are already done in the callers as well. Differential Revision: https://reviews.llvm.org/D94055 Added: Modified: llvm/include/llvm/Analysis/LazyCallGraph.h llvm/lib/Analysis/LazyCallGraph.cpp Removed: diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h index 7478e1726366..e92134d074e5 100644 --- a/llvm/include/llvm/Analysis/LazyCallGraph.h +++ b/llvm/include/llvm/Analysis/LazyCallGraph.h @@ -598,10 +598,6 @@ class LazyCallGraph { void verify(); #endif -/// Handle any necessary parent set updates after inserting a trivial ref -/// or call edge. -void handleTrivialEdgeInsertion(Node &SourceN, Node &TargetN); - public: using iterator = pointee_iterator::const_iterator>; using range = iterator_range; diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp index 26529891bc03..ab6946ee96be 100644 --- a/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/llvm/lib/Analysis/LazyCallGraph.cpp @@ -1373,23 +1373,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, return Result; } -void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN, - Node &TargetN) { - // The only trivial case that requires any graph updates is when we add new - // ref edge and may connect diff erent RefSCCs along that path. This is only - // because of the parents set. Every other part of the graph remains constant - // after this edge insertion. - assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC."); - RefSCC &TargetRC = *G->lookupRefSCC(TargetN); - if (&TargetRC == this) -return; - -#ifdef EXPENSIVE_CHECKS - assert(TargetRC.isDescendantOf(*this) && - "Target must be a descendant of the Source."); -#endif -} - void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN, Node &TargetN) { #ifndef NDEBUG @@ -1420,9 +1403,6 @@ void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN, // Create the new edge. SourceN->Edges.emplace_back(TargetN, Edge::Call); } - - // Now that we have the edge, handle the graph fallout. - handleTrivialEdgeInsertion(SourceN, TargetN); } void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) { @@ -1449,9 +1429,6 @@ void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) { // Create the new edge. SourceN->Edges.emplace_back(TargetN, Edge::Ref); - - // Now that we have the edge, handle the graph fallout. - handleTrivialEdgeInsertion(SourceN, TargetN); } void LazyCallGraph::RefSCC::replaceNodeFunction(Node &N, Function &NewF) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 854b861 - [llvm/Orc] Fix ExecutionEngine module build breakage
Author: Med Ismail Bennani Date: 2021-01-05T05:33:44+01:00 New Revision: 854b861881a28fda6dd9601cc07b54822ce5d00d URL: https://github.com/llvm/llvm-project/commit/854b861881a28fda6dd9601cc07b54822ce5d00d DIFF: https://github.com/llvm/llvm-project/commit/854b861881a28fda6dd9601cc07b54822ce5d00d.diff LOG: [llvm/Orc] Fix ExecutionEngine module build breakage This patch updates the llvm module map to reflect changes made in `5efc71e119d4eba235209d262e7d171361a0b9be` and fixes the module builds (`-DLLVM_ENABLE_MODULES=On`). Differential Revision: https://reviews.llvm.org/D94057 Signed-off-by: Med Ismail Bennani Added: Modified: llvm/include/llvm/module.modulemap Removed: diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap index 0fd63b00fd0d..51a4a3d5eb3a 100644 --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -211,9 +211,9 @@ module LLVM_OrcSupport { requires cplusplus header "ExecutionEngine/Orc/OrcError.h" - header "ExecutionEngine/Orc/RPC/RPCUtils.h" - header "ExecutionEngine/Orc/RPC/RPCSerialization.h" - header "ExecutionEngine/Orc/RPC/RawByteChannel.h" + header "ExecutionEngine/Orc/Shared/RPCUtils.h" + header "ExecutionEngine/Orc/Shared/Serialization.h" + header "ExecutionEngine/Orc/Shared/RawByteChannel.h" export * } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] 979c38c - [compiler-rt] [windows] Add UNUSED attributes on variables/functions only used for 64 bit targets
Author: Martin Storsjö Date: 2021-01-05T08:59:08+02:00 New Revision: 979c38cc74f4e96c5e1bee1f966a81038fff1ed6 URL: https://github.com/llvm/llvm-project/commit/979c38cc74f4e96c5e1bee1f966a81038fff1ed6 DIFF: https://github.com/llvm/llvm-project/commit/979c38cc74f4e96c5e1bee1f966a81038fff1ed6.diff LOG: [compiler-rt] [windows] Add UNUSED attributes on variables/functions only used for 64 bit targets This fixes warnings when building for 32 bit targets. Differential Revision: https://reviews.llvm.org/D91852 Added: Modified: compiler-rt/lib/interception/interception_win.cpp Removed: diff --git a/compiler-rt/lib/interception/interception_win.cpp b/compiler-rt/lib/interception/interception_win.cpp index 1a1c327e6124..98bc756ae53a 100644 --- a/compiler-rt/lib/interception/interception_win.cpp +++ b/compiler-rt/lib/interception/interception_win.cpp @@ -136,7 +136,7 @@ namespace __interception { static const int kAddressLength = FIRST_32_SECOND_64(4, 8); static const int kJumpInstructionLength = 5; static const int kShortJumpInstructionLength = 2; -static const int kIndirectJumpInstructionLength = 6; +UNUSED static const int kIndirectJumpInstructionLength = 6; static const int kBranchLength = FIRST_32_SECOND_64(kJumpInstructionLength, kIndirectJumpInstructionLength); static const int kDirectBranchLength = kBranchLength + kAddressLength; @@ -165,7 +165,7 @@ static uptr GetMmapGranularity() { return si.dwAllocationGranularity; } -static uptr RoundUpTo(uptr size, uptr boundary) { +UNUSED static uptr RoundUpTo(uptr size, uptr boundary) { return (size + boundary - 1) & ~(boundary - 1); } @@ -309,7 +309,7 @@ struct TrampolineMemoryRegion { uptr max_size; }; -static const uptr kTrampolineScanLimitRange = 1 << 31; // 2 gig +UNUSED static const uptr kTrampolineScanLimitRange = 1 << 31; // 2 gig static const int kMaxTrampolineRegion = 1024; static TrampolineMemoryRegion TrampolineRegions[kMaxTrampolineRegion]; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits