[llvm-branch-commits] [InstrProfiling] Do not sanitize PGO instrumentation (PR #86775)
https://github.com/vitalybuka converted_to_draft https://github.com/llvm/llvm-project/pull/86775 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][IR] Add SetNoSanitize helpers (PR #86772)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/86772 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [InstrProfiling] Do not sanitize PGO instrumentation (PR #86775)
llvmbot wrote: @llvm/pr-subscribers-pgo Author: Vitaly Buka (vitalybuka) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/86775.diff 1 Files Affected: - (modified) llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp (+32-13) ``diff diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index c42c53edd51190..7cb0b29bff2ce2 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -188,6 +188,17 @@ static bool profDataReferencedByCode(const Module &M) { return enablesValueProfiling(M); } +class InstrIRBuilder : public IRBuilder<> { +public: + explicit InstrIRBuilder(Instruction *IP) : IRBuilder<>(IP) { +SetNoSanitizeMetadata(); + } + + explicit InstrIRBuilder(BasicBlock *BB) : IRBuilder<>(BB) { +SetNoSanitizeMetadata(); + } +}; + class InstrLowerer final { public: InstrLowerer(Module &M, const InstrProfOptions &Options, @@ -370,7 +381,7 @@ class PGOCounterPromoterHelper : public LoadAndStorePromoter { Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); Value *Addr = cast(Store)->getPointerOperand(); Type *Ty = LiveInValue->getType(); - IRBuilder<> Builder(InsertPos); + InstrIRBuilder Builder(InsertPos); if (auto *AddrInst = dyn_cast_or_null(Addr)) { // If isRuntimeCounterRelocationEnabled() is true then the address of // the store instruction is computed with two instructions in @@ -842,7 +853,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind) Index += It->second.NumValueSites[Kind]; - IRBuilder<> Builder(Ind); + InstrIRBuilder Builder(Ind); bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() == llvm::InstrProfValueKind::IPVK_MemOPSize); CallInst *Call = nullptr; @@ -872,7 +883,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { auto *Counters = getOrCreateRegionCounters(I); - IRBuilder<> Builder(I); + InstrIRBuilder Builder(I); if (isa(I)) Counters->setAlignment(Align(8)); @@ -887,7 +898,7 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { Function *Fn = I->getParent()->getParent(); LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn]; if (!BiasLI) { -IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front()); +InstrIRBuilder EntryBuilder(&Fn->getEntryBlock().front()); auto *Bias = M.getGlobalVariable(getInstrProfCounterBiasVarName()); if (!Bias) { // Compiler must define this variable when runtime counter relocation @@ -897,6 +908,7 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); Bias->setVisibility(GlobalVariable::HiddenVisibility); + Bias->setNoSanitizeMetadata(); // A definition that's weak (linkonce_odr) without being in a COMDAT // section wouldn't lead to link errors, but it would lead to a dead // data word from every TU but one. Putting it in COMDAT ensures there @@ -912,7 +924,7 @@ Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { auto *Bitmaps = getOrCreateRegionBitmaps(I); - IRBuilder<> Builder(I); + InstrIRBuilder Builder(I); auto *Addr = Builder.CreateConstInBoundsGEP2_32( Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue()); @@ -931,7 +943,7 @@ Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) { auto *Addr = getCounterAddress(CoverInstruction); - IRBuilder<> Builder(CoverInstruction); + InstrIRBuilder Builder(CoverInstruction); // We store zero to represent that this block is covered. Builder.CreateStore(Builder.getInt8(0), Addr); CoverInstruction->eraseFromParent(); @@ -943,7 +955,7 @@ void InstrLowerer::lowerTimestamp( "timestamp probes are always the first probe for a function"); auto &Ctx = M.getContext(); auto *TimestampAddr = getCounterAddress(TimestampInstruction); - IRBuilder<> Builder(TimestampInstruction); + InstrIRBuilder Builder(TimestampInstruction); auto *CalleeTy = FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false); auto Callee = M.getOrInsertFunction( @@ -955,7 +967,7 @@ void InstrLowerer::lowerTimestamp( void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) { auto *Addr = getCounterAddress(Inc); - IRBuilder<> Builder(Inc); + InstrIRBuilder Builder(Inc); if (Options.Atomic || AtomicCounterUpdateAll || (Inc->
[llvm-branch-commits] [InstrProfiling] Do not sanitize PGO instrumentation (PR #86775)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/86775 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [HWASAN] Don't instrument loads from global if globals are not tagged (PR #86774)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/86774 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][IR] Add SetNoSanitize helpers (PR #86772)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/86772 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][HWASAN] Promote InstrumentGlobals to member (PR #86773)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/86773 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][HWASAN] Promote InstrumentGlobals to member (PR #86773)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/86773 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][IR] Add SetNoSanitize helpers (PR #86772)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/86772 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [HWASAN] Don't instrument loads from global if globals are not tagged (PR #86774)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-compiler-rt-sanitizer Author: Vitaly Buka (vitalybuka) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/86774.diff 3 Files Affected: - (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (+8) - (modified) llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll (-16) - (modified) llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll (-1) ``diff diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 96fd530be33318..f89a22d951a9f7 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -790,6 +790,14 @@ bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) { if (SSI && SSI->stackAccessIsSafe(*Inst)) return true; } + + GlobalVariable *G = dyn_cast(getUnderlyingObject(Ptr)); + if (G) { +if (!InstrumentGlobals) + return true; +// TODO: Optimize inbound global accesses, like Asan `instrumentMop`. + } + return false; } diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll b/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll index e59701253d8bc1..11ac88c40c2ed6 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/globals-access.ll @@ -13,22 +13,6 @@ define dso_local noundef i32 @_Z3tmpv() sanitize_hwaddress { ; OFF-LABEL: define dso_local noundef i32 @_Z3tmpv( ; OFF-SAME: ) #[[ATTR0:[0-9]+]] { ; OFF-NEXT: entry: -; OFF-NEXT:[[TMP12:%.*]] = load i64, ptr @__hwasan_tls, align 8 -; OFF-NEXT:[[TMP1:%.*]] = or i64 [[TMP12]], 4294967295 -; OFF-NEXT:[[HWASAN_SHADOW:%.*]] = add i64 [[TMP1]], 1 -; OFF-NEXT:[[TMP2:%.*]] = inttoptr i64 [[HWASAN_SHADOW]] to ptr -; OFF-NEXT:[[TMP3:%.*]] = lshr i64 ptrtoint (ptr @x to i64), 56 -; OFF-NEXT:[[TMP4:%.*]] = trunc i64 [[TMP3]] to i8 -; OFF-NEXT:[[TMP5:%.*]] = and i64 ptrtoint (ptr @x to i64), 72057594037927935 -; OFF-NEXT:[[TMP6:%.*]] = lshr i64 [[TMP5]], 4 -; OFF-NEXT:[[TMP7:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP6]] -; OFF-NEXT:[[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1 -; OFF-NEXT:[[TMP9:%.*]] = icmp ne i8 [[TMP4]], [[TMP8]] -; OFF-NEXT:br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1:![0-9]+]] -; OFF: 10: -; OFF-NEXT:call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[TMP2]], ptr @x, i32 2) -; OFF-NEXT:br label [[TMP11]] -; OFF: 11: ; OFF-NEXT:[[TMP0:%.*]] = load i32, ptr @x, align 4 ; OFF-NEXT:ret i32 [[TMP0]] ; diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll index 079d7224128301..62fd7a16715693 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/use-after-scope-setjmp.ll @@ -54,7 +54,6 @@ define dso_local noundef i1 @_Z6targetv() sanitize_hwaddress { ; CHECK: sw.bb1: ; CHECK-NEXT:br label [[RETURN]] ; CHECK: while.body: -; CHECK-NEXT:call void @llvm.hwasan.check.memaccess(ptr [[TMP16]], ptr @stackbuf, i32 19) ; CHECK-NEXT:store ptr [[BUF_HWASAN]], ptr @stackbuf, align 8 ; CHECK-NEXT:call void @may_jump() ; CHECK-NEXT:br label [[RETURN]] `` https://github.com/llvm/llvm-project/pull/86774 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][HWASAN] Promote InstrumentGlobals to member (PR #86773)
llvmbot wrote: @llvm/pr-subscribers-compiler-rt-sanitizer Author: Vitaly Buka (vitalybuka) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/86773.diff 1 Files Affected: - (modified) llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp (+5-2) ``diff diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 5d366e3d6dee0a..96fd530be33318 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -422,6 +422,7 @@ class HWAddressSanitizer { bool InstrumentLandingPads; bool InstrumentWithCalls; bool InstrumentStack; + bool InstrumentGlobals; bool DetectUseAfterScope; bool UsePageAliases; bool UseMatchAllCallback; @@ -639,11 +640,13 @@ void HWAddressSanitizer::initializeModule() { // If we don't have personality function support, fall back to landing pads. InstrumentLandingPads = optOr(ClInstrumentLandingPads, !NewRuntime); + InstrumentGlobals = + !CompileKernel && !UsePageAliases && optOr(ClGlobals, NewRuntime); + if (!CompileKernel) { createHwasanCtorComdat(); -bool InstrumentGlobals = optOr(ClGlobals, NewRuntime); -if (InstrumentGlobals && !UsePageAliases) +if (InstrumentGlobals) instrumentGlobals(); bool InstrumentPersonalityFunctions = `` https://github.com/llvm/llvm-project/pull/86773 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][IR] Add SetNoSanitize helpers (PR #86772)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Vitaly Buka (vitalybuka) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/86772.diff 3 Files Affected: - (modified) llvm/include/llvm/IR/GlobalValue.h (+1) - (modified) llvm/include/llvm/IR/IRBuilder.h (+6) - (modified) llvm/lib/IR/Globals.cpp (+7) ``diff diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h index aa8188cd99fee1..c61d502aa332b0 100644 --- a/llvm/include/llvm/IR/GlobalValue.h +++ b/llvm/include/llvm/IR/GlobalValue.h @@ -360,6 +360,7 @@ class GlobalValue : public Constant { // storage is shared between `G1` and `G2`. void setSanitizerMetadata(SanitizerMetadata Meta); void removeSanitizerMetadata(); + void setNoSanitizeMetadata(); bool isTagged() const { return hasSanitizerMetadata() && getSanitizerMetadata().Memtag; diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index a6165ef13fd790..2a0c1e9e8c446b 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -221,6 +221,12 @@ class IRBuilderBase { AddOrRemoveMetadataToCopy(LLVMContext::MD_dbg, L.getAsMDNode()); } + /// Set nosanitize metadata. + void SetNoSanitizeMetadata() { +AddOrRemoveMetadataToCopy(llvm::LLVMContext::MD_nosanitize, + llvm::MDNode::get(getContext(), std::nullopt)); + } + /// Collect metadata with IDs \p MetadataKinds from \p Src which should be /// added to all created instructions. Entries present in MedataDataToCopy but /// not on \p Src will be dropped from MetadataToCopy. diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 481a1d802e66b6..40f854a2c90635 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -243,6 +243,13 @@ void GlobalValue::removeSanitizerMetadata() { HasSanitizerMetadata = false; } +void GlobalValue::setNoSanitizeMetadata() { + SanitizerMetadata Meta; + Meta.NoAddress = true; + Meta.NoHWAddress = true; + setSanitizerMetadata(Meta); +} + StringRef GlobalObject::getSectionImpl() const { assert(hasSection()); return getContext().pImpl->GlobalObjectSections[this]; `` https://github.com/llvm/llvm-project/pull/86772 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [HWASAN] Don't instrument loads from global if globals are not tagged (PR #86774)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/86774 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][HWASAN] Promote InstrumentGlobals to member (PR #86773)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/86773 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][IR] Add SetNoSanitize helpers (PR #86772)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/86772 None ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x [X86_64] fix SSE type error in vaarg (PR #86698)
AtariDreams wrote: > Is there some reason you think we should take this specific patch, out of all > the x86 ABI fixes going in recently? It isn't a regression, as far as I know. It's a miscompile https://github.com/llvm/llvm-project/pull/86698 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/82128 >From f46d68517b6d0804f56a087c86c20c9bc3a08a0f Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 26 Mar 2024 14:23:09 -0700 Subject: [PATCH 1/2] Update test Created using spr 1.3.4 --- bolt/test/X86/yaml-indirect-call-discriminator.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/test/X86/yaml-indirect-call-discriminator.s b/bolt/test/X86/yaml-indirect-call-discriminator.s index 172eba6dc31c07..c7678b3811834d 100644 --- a/bolt/test/X86/yaml-indirect-call-discriminator.s +++ b/bolt/test/X86/yaml-indirect-call-discriminator.s @@ -44,7 +44,7 @@ main: testq %rax, %rax jne Lindcall Lcall: - callsecondary_entry + callfunc # FDATA: 1 main #Lcall# 1 func 0 1 1 Lindcall: callq *%rax >From 70c242d63b6aa862eba2bb5b03ec1eb6602e0f31 Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 26 Mar 2024 18:30:40 -0700 Subject: [PATCH 2/2] Align secondary entry enumeration in getSymbolForEntryID with getEntryIDForSymbol Created using spr 1.3.4 --- bolt/lib/Core/BinaryFunction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 32bf822f077ade..c9e037c225dd41 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -3547,7 +3547,7 @@ MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) { if (!isMultiEntry()) return nullptr; - uint64_t NumEntries = 0; + uint64_t NumEntries = 1; if (hasCFG()) { for (BinaryBasicBlock *BB : BasicBlocks) { MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis (PR #86731)
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/86731 >From 737f0ddcc34b407e0c910375fc9dcc095d51914c Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2024 17:05:50 -0700 Subject: [PATCH] [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis. Need to support proper type conversion for function arguments to avoid compiler crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp| 21 - .../X86/call-arg-reduced-by-minbitwidth.ll| 82 +++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0a9e2c7f49f55f..1fbd69e38eaeec 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); + auto *CEI = cast(VL0); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { - CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -11671,6 +11671,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } +auto GetOperandSignedness = [&](unsigned Idx) { + const TreeEntry *OpE = getOperandEntry(E, Idx); + bool IsSigned = false; + auto It = MinBWs.find(OpE); + if (It != MinBWs.end()) +IsSigned = It->second.second; + else +IsSigned = any_of(OpE->Scalars, [&](Value *R) { + return !isKnownNonNegative(R, SimplifyQuery(*DL)); +}); + return IsSigned; +}; +ScalarArg = CEI->getArgOperand(I); +if (cast(OpVec->getType())->getElementType() != +ScalarArg->getType()) { + auto *CastTy = FixedVectorType::get(ScalarArg->getType(), + VecTy->getNumElements()); + OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); +} LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll new file mode 100644 index 00..49e89feb475b95 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s + +define void @test(ptr %0, i8 %1, i1 %cmp12.i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0 +; CHECK-NEXT:[[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:[[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0 +; CHECK-NEXT:[[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:br label [[PRE:%.*]] +; CHECK: pre: +; CHECK-NEXT:[[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32> +; CHECK-NEXT:[[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT:[[TMP8:%.*]] = add <8 x i32> [[TMP7]], +; CHECK-NEXT:[[TMP9:%.*]] = select <8 x i1> [[TMP3]], <8 x i32> [[TMP8]], <8 x i32> [[TMP6]] +; CHECK-NEXT:[[TMP10:%.*]] = trunc <8 x i32> [[TMP9]] to <8 x i8> +; CHECK-NEXT:store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 +; CHECK-NEXT:br label [[PRE]] +; +entry: + %idx11 = getelementptr i8, ptr %0, i64 1 + %idx22 = getelementptr i8, ptr %0, i64 2 + %idx33 = getelementptr i8, ptr %0, i64 3 + %idx44 = getelementptr i8, ptr %0, i64 4 + %idx55 = getelementptr i8, ptr %0, i64 5 + %idx66 = getelementptr i8, ptr %0, i64 6 + %idx77 = getelementptr i8, ptr %0, i64 7 + br label %pre + +pre: + %conv.i = zext i8 %1 to i32 + %2 = tail cal
[llvm-branch-commits] [llvm] release/18.x: [Mips] Restore wrong deletion of instruction 'and' in unsigned min/max processing. (#85902) (PR #86424)
https://github.com/topperc approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/86424 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Inline][PGO] After inline, update profile for invoke instruction in both cloned instruction in the caller and original callee (PR #83809)
minglotus-6 wrote: > The invoke instruction can have 3 different kinds of prof data > > 1. call count (if a direct call) > 2. VP profile data (if an indirect call) > 3. branch weights for landing pad. > 4. can coexist with 2) and does not need to be updated. Is there an existing > test coverage for type 1) update? Added test cases for 1 and 2 in the pre-commit [PR](https://github.com/llvm/llvm-project/pull/83780) so the diff in this one is clearer. As discussed offline, non-count prof data (i.e., representing taken or not taken branches, associated with `br` or `switchinst`, etc) doesn't need scaling so no extra work needed. PTAL. https://github.com/llvm/llvm-project/pull/83809 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Inline][PGO] After inline, update profile for invoke instruction in both cloned instruction in the caller and original callee (PR #83809)
https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/83809 >From 9575b83ea40012ecbfbf301a24ec89de0726ffd4 Mon Sep 17 00:00:00 2001 From: mingmingl Date: Mon, 4 Mar 2024 00:43:55 -0800 Subject: [PATCH] update profile for invoke instruction in caller and callee after inline --- llvm/include/llvm/IR/Instructions.h | 3 + llvm/lib/IR/Instructions.cpp | 12 ++ llvm/lib/Transforms/Utils/InlineFunction.cpp | 11 +- .../Inline/update_invoke_value_profile.ll | 185 ++ 4 files changed, 209 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/Inline/update_invoke_value_profile.ll diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index bc357074e5cb21..1146b3fa3ae244 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -4360,6 +4360,9 @@ class InvokeInst : public CallBase { unsigned getNumSuccessors() const { return 2; } + /// Updates profile metadata by scaling it by \p S / \p T. + void updateProfWeight(uint64_t S, uint64_t T); + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return (I->getOpcode() == Instruction::Invoke); diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index 9ae71acd523c36..920ce67f118991 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -918,6 +918,18 @@ LandingPadInst *InvokeInst::getLandingPadInst() const { return cast(getUnwindDest()->getFirstNonPHI()); } +void InvokeInst::updateProfWeight(uint64_t S, uint64_t T) { + if (T == 0) { +LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in " + "div by 0. Ignoring. Likely the function " + << getParent()->getParent()->getName() + << " has 0 entry count, and contains call instructions " + "with non-zero prof info."); +return; + } + scaleProfData(*this, S, T); +} + //===--===// //CallBrInst Implementation //===--===// diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f68fdb26f28173..75b0d0669e9228 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1909,10 +1909,14 @@ void llvm::updateProfileCallee( // During inlining ? if (VMap) { uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; -for (auto Entry : *VMap) +for (auto Entry : *VMap) { if (isa(Entry.first)) if (auto *CI = dyn_cast_or_null(Entry.second)) CI->updateProfWeight(CloneEntryCount, PriorEntryCount); + if (isa(Entry.first)) +if (auto *II = dyn_cast_or_null(Entry.second)) + II->updateProfWeight(CloneEntryCount, PriorEntryCount); +} } if (EntryDelta) { @@ -1921,9 +1925,12 @@ void llvm::updateProfileCallee( for (BasicBlock &BB : *Callee) // No need to update the callsite if it is pruned during inlining. if (!VMap || VMap->count(&BB)) -for (Instruction &I : BB) +for (Instruction &I : BB) { if (CallInst *CI = dyn_cast(&I)) CI->updateProfWeight(NewEntryCount, PriorEntryCount); + if (InvokeInst *II = dyn_cast(&I)) +II->updateProfWeight(NewEntryCount, PriorEntryCount); +} } } diff --git a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll new file mode 100644 index 00..ac5597a41fce61 --- /dev/null +++ b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 + +; RUN: opt < %s -passes='require,cgscc(inline)' -inline-threshold=1000 -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%class.Error = type { i32 } +@_ZTI5Error = external constant { ptr, ptr } + +define i32 @callee(ptr %b) personality ptr @__gxx_personality_v0 !prof !17 { +; CHECK-LABEL: define i32 @callee( +; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF0:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8 +; CHECK-NEXT:[[VTABLE:%.*]] = load ptr, ptr [[B]], align 8 +; CHECK-NEXT:[[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8 +; CHECK-NEXT:[[CALL:%.*]] = invoke i32 [[TMP0]](ptr [[B]]) +; CHECK-NEXT:to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]], !prof [[PROF1:![0-9]+]] +; CHECK: lpad: +; CHECK-NEXT:[[TM
[llvm-branch-commits] [clang] release/18.x [X86_64] fix SSE type error in vaarg (PR #86698)
efriedma-quic wrote: Is there some reason you think we should take this specific patch, out of all the x86 ABI fixes going in recently? It isn't a regression, as far as I know. https://github.com/llvm/llvm-project/pull/86698 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/82128 >From f46d68517b6d0804f56a087c86c20c9bc3a08a0f Mon Sep 17 00:00:00 2001 From: Amir Ayupov Date: Tue, 26 Mar 2024 14:23:09 -0700 Subject: [PATCH] Update test Created using spr 1.3.4 --- bolt/test/X86/yaml-indirect-call-discriminator.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/test/X86/yaml-indirect-call-discriminator.s b/bolt/test/X86/yaml-indirect-call-discriminator.s index 172eba6dc31c07..c7678b3811834d 100644 --- a/bolt/test/X86/yaml-indirect-call-discriminator.s +++ b/bolt/test/X86/yaml-indirect-call-discriminator.s @@ -44,7 +44,7 @@ main: testq %rax, %rax jne Lindcall Lcall: - callsecondary_entry + callfunc # FDATA: 1 main #Lcall# 1 func 0 1 1 Lindcall: callq *%rax ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release 18.x [X86] Fix miscompile in combineShiftRightArithmetic (PR #86728)
https://github.com/AtariDreams edited https://github.com/llvm/llvm-project/pull/86728 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release 18.x [X86] Fix miscompile in combineShiftRightArithmetic (PR #86728)
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/86728 >From 0cb47c1dfda89f97725d5855429eb037be7f90ef Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 26 Mar 2024 16:34:09 -0400 Subject: [PATCH 1/2] [X86] Pre-commit tests (NFC) --- llvm/test/CodeGen/X86/sar_fold.ll | 41 +++ 1 file changed, 41 insertions(+) diff --git a/llvm/test/CodeGen/X86/sar_fold.ll b/llvm/test/CodeGen/X86/sar_fold.ll index 21655e19440afe..22ae8e8abd3eca 100644 --- a/llvm/test/CodeGen/X86/sar_fold.ll +++ b/llvm/test/CodeGen/X86/sar_fold.ll @@ -44,3 +44,44 @@ define i32 @shl24sar25(i32 %a) #0 { %2 = ashr exact i32 %1, 25 ret i32 %2 } + +define void @shl144sar48(ptr %p) #0 { +; CHECK-LABEL: shl144sar48: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:movl %ecx, %edx +; CHECK-NEXT:sarl $31, %edx +; CHECK-NEXT:shldl $2, %ecx, %edx +; CHECK-NEXT:shll $2, %ecx +; CHECK-NEXT:movl %ecx, 12(%eax) +; CHECK-NEXT:movl %edx, 16(%eax) +; CHECK-NEXT:movl $0, 8(%eax) +; CHECK-NEXT:movl $0, 4(%eax) +; CHECK-NEXT:movl $0, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 46 + store i160 %2, ptr %p + ret void +} + +define void @shl144sar2(ptr %p) #0 { +; CHECK-LABEL: shl144sar2: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:sarl $31, %ecx +; CHECK-NEXT:movl %ecx, 16(%eax) +; CHECK-NEXT:movl %ecx, 8(%eax) +; CHECK-NEXT:movl %ecx, 12(%eax) +; CHECK-NEXT:movl %ecx, 4(%eax) +; CHECK-NEXT:movl %ecx, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 2 + store i160 %2, ptr %p + ret void +} >From b03a8f910c4e162877ac75954a38a4ec169fff67 Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 26 Mar 2024 16:38:46 -0400 Subject: [PATCH 2/2] [X86] Fix miscompile in combineShiftRightArithmetic When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has different types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a6) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 ++--- llvm/test/CodeGen/X86/sar_fold.ll | 10 - 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e64726fb6fff7..71fc6b5047eaa9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47035,10 +47035,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47054,29 +47057,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) +return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); -// skipping types without corresponding sext/zext and -// ShlConst that is not one of [56,48,32,24,16] +// Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); -SarConst = SarConst - (Size - ShiftSize); -if (SarConst == 0) +if (SraConst.eq(ShlConst)) return NN; -if (SarConst.isNegative()) +if (SraConst.ult(ShlConst)) return DAG.getNode(ISD::SHL, DL, VT, NN, - DAG.getConstant(-SarConst, DL, CVT)); +
[llvm-branch-commits] [llvm] release 18.x [X86] Fix miscompile in combineShiftRightArithmetic (PR #86728)
https://github.com/AtariDreams edited https://github.com/llvm/llvm-project/pull/86728 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release 18.x [X86] Fix miscompile in combineShiftRightArithmetic (#86597) (PR #86728)
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/86728 >From 0cb47c1dfda89f97725d5855429eb037be7f90ef Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 26 Mar 2024 16:34:09 -0400 Subject: [PATCH 1/2] [X86] Pre-commit tests (NFC) --- llvm/test/CodeGen/X86/sar_fold.ll | 41 +++ 1 file changed, 41 insertions(+) diff --git a/llvm/test/CodeGen/X86/sar_fold.ll b/llvm/test/CodeGen/X86/sar_fold.ll index 21655e19440afe..22ae8e8abd3eca 100644 --- a/llvm/test/CodeGen/X86/sar_fold.ll +++ b/llvm/test/CodeGen/X86/sar_fold.ll @@ -44,3 +44,44 @@ define i32 @shl24sar25(i32 %a) #0 { %2 = ashr exact i32 %1, 25 ret i32 %2 } + +define void @shl144sar48(ptr %p) #0 { +; CHECK-LABEL: shl144sar48: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:movl %ecx, %edx +; CHECK-NEXT:sarl $31, %edx +; CHECK-NEXT:shldl $2, %ecx, %edx +; CHECK-NEXT:shll $2, %ecx +; CHECK-NEXT:movl %ecx, 12(%eax) +; CHECK-NEXT:movl %edx, 16(%eax) +; CHECK-NEXT:movl $0, 8(%eax) +; CHECK-NEXT:movl $0, 4(%eax) +; CHECK-NEXT:movl $0, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 46 + store i160 %2, ptr %p + ret void +} + +define void @shl144sar2(ptr %p) #0 { +; CHECK-LABEL: shl144sar2: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:sarl $31, %ecx +; CHECK-NEXT:movl %ecx, 16(%eax) +; CHECK-NEXT:movl %ecx, 8(%eax) +; CHECK-NEXT:movl %ecx, 12(%eax) +; CHECK-NEXT:movl %ecx, 4(%eax) +; CHECK-NEXT:movl %ecx, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 2 + store i160 %2, ptr %p + ret void +} >From d9f92302b0bf51450f3acfad1ea5002a3be48f24 Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 26 Mar 2024 16:38:46 -0400 Subject: [PATCH 2/2] [X86] Fix miscompile in combineShiftRightArithmetic (#86597) When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has different types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a6) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 ++--- llvm/test/CodeGen/X86/sar_fold.ll | 10 - 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e64726fb6fff7..71fc6b5047eaa9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47035,10 +47035,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47054,29 +47057,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) +return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); -// skipping types without corresponding sext/zext and -// ShlConst that is not one of [56,48,32,24,16] +// Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); -SarConst = SarConst - (Size - ShiftSize); -if (SarConst == 0) +if (SraConst.eq(ShlConst)) return NN; -if (SarConst.isNegative()) +if (SraConst.ult(ShlConst)) return DAG.getNode(ISD::SHL, DL, VT, NN, - DAG.getConstant(-SarConst, DL, CV
[llvm-branch-commits] [llvm] release 18.x [X86] Fix miscompile in combineShiftRightArithmetic (#86597) (PR #86728)
=?utf-8?q?Björn?= Pettersson Message-ID: In-Reply-To: https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/86728 >From bd0c822c3bf356b4dc8ddc0a7449e199c44ce840 Mon Sep 17 00:00:00 2001 From: Rose Date: Tue, 26 Mar 2024 16:34:09 -0400 Subject: [PATCH 1/2] [X86] Pre-commit tests (NFC) --- llvm/test/CodeGen/X86/sar_fold.ll | 41 +++ 1 file changed, 41 insertions(+) diff --git a/llvm/test/CodeGen/X86/sar_fold.ll b/llvm/test/CodeGen/X86/sar_fold.ll index 21655e19440afe..0f1396954b03a1 100644 --- a/llvm/test/CodeGen/X86/sar_fold.ll +++ b/llvm/test/CodeGen/X86/sar_fold.ll @@ -44,3 +44,44 @@ define i32 @shl24sar25(i32 %a) #0 { %2 = ashr exact i32 %1, 25 ret i32 %2 } + +define void @shl144sar48(ptr %p) #0 { +; CHECK-LABEL: shl144sar48: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:movl %ecx, %edx +; CHECK-NEXT:sarl $31, %edx +; CHECK-NEXT:shldl $2, %ecx, %edx +; CHECK-NEXT:shll $2, %ecx +; CHECK-NEXT:movl %ecx, 12(%eax) +; CHECK-NEXT:movl %edx, 16(%eax) +; CHECK-NEXT:movl $0, 8(%eax) +; CHECK-NEXT:movl $0, 4(%eax) +; CHECK-NEXT:movl $0, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 46 + store i160 %2, ptr %p + ret void +} + +define void @shl144sar2(ptr %p) #0 { +; CHECK-LABEL: shl144sar2: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:shll $14, %ecx +; CHECK-NEXT:movl %ecx, 16(%eax) +; CHECK-NEXT:movl $0, 8(%eax) +; CHECK-NEXT:movl $0, 12(%eax) +; CHECK-NEXT:movl $0, 4(%eax) +; CHECK-NEXT:movl $0, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 2 + store i160 %2, ptr %p + ret void +} >From de81ccd29087367173843fb09091365099ea93de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Pettersson?= Date: Tue, 26 Mar 2024 20:53:34 +0100 Subject: [PATCH 2/2] [X86] Fix miscompile in combineShiftRightArithmetic (#86597) When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has differet types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a6) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 ++--- llvm/test/CodeGen/X86/sar_fold.ll | 10 - 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e64726fb6fff7..71fc6b5047eaa9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47035,10 +47035,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47054,29 +47057,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) +return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); -// skipping types without corresponding sext/zext and -// ShlConst that is not one of [56,48,32,24,16] +// Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); -SarConst = SarConst - (Size - ShiftSize); -if (SarConst == 0) +if (SraConst.eq(ShlConst)) return NN; -if (SarConst.isNegative()) +if (SraConst.ult(ShlConst)) return DAG.getNode(IS
[llvm-branch-commits] [llvm] release/18.x [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis (PR #86731)
https://github.com/alexey-bataev approved this pull request. LG https://github.com/llvm/llvm-project/pull/86731 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis (PR #86731)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: AtariDreams (AtariDreams) Changes Need to support proper type conversion for function arguments to avoid compiler crash. --- Full diff: https://github.com/llvm/llvm-project/pull/86731.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+20-1) - (added) llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll (+82) ``diff diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0a9e2c7f49f55f..1fbd69e38eaeec 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); + auto *CEI = cast(VL0); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { - CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -11671,6 +11671,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } +auto GetOperandSignedness = [&](unsigned Idx) { + const TreeEntry *OpE = getOperandEntry(E, Idx); + bool IsSigned = false; + auto It = MinBWs.find(OpE); + if (It != MinBWs.end()) +IsSigned = It->second.second; + else +IsSigned = any_of(OpE->Scalars, [&](Value *R) { + return !isKnownNonNegative(R, SimplifyQuery(*DL)); +}); + return IsSigned; +}; +ScalarArg = CEI->getArgOperand(I); +if (cast(OpVec->getType())->getElementType() != +ScalarArg->getType()) { + auto *CastTy = FixedVectorType::get(ScalarArg->getType(), + VecTy->getNumElements()); + OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); +} LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll new file mode 100644 index 00..27c9655f94d3c5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s + +define void @test(ptr %0, i8 %1, i1 %cmp12.i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0 +; CHECK-NEXT:[[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:[[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0 +; CHECK-NEXT:[[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:br label [[PRE:%.*]] +; CHECK: pre: +; CHECK-NEXT:[[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32> +; CHECK-NEXT:[[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT:[[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i8> +; CHECK-NEXT:[[TMP9:%.*]] = add <8 x i8> [[TMP8]], +; CHECK-NEXT:[[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] +; CHECK-NEXT:store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 +; CHECK-NEXT:br label [[PRE]] +; +entry: + %idx11 = getelementptr i8, ptr %0, i64 1 + %idx22 = getelementptr i8, ptr %0, i64 2 + %idx33 = getelementptr i8, ptr %0, i64 3 + %idx44 = getelementptr i8, ptr %0, i64 4 + %idx55 = getelementptr i8, ptr %0, i64 5 + %idx66 = getelementptr i8, ptr %0, i64 6 + %idx77 = getelementptr i8, ptr %0, i64 7 + br label %pre + +pre: + %conv.i = zext i8 %1 to i32 + %2 = tail call i32 @llvm.umax.i32(i32 %conv.i, i32 1) + %.sroa.speculated.i = add i32 %2, 1 + %intensity.0.i = select i1 %cmp12.i, i32 %.sroa.speculated.i, i32 %conv.i + %conv14.i = trunc i32 %intensity.0.i to i8 + store i8 %conv14.i, ptr
[llvm-branch-commits] [llvm] release/18.x [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis (PR #86731)
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/86731 >From f4a886e10ccb56edc8d002492cc9d208362a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2024 17:05:50 -0700 Subject: [PATCH] [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis. Need to support proper type conversion for function arguments to avoid compiler crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp| 21 - .../X86/call-arg-reduced-by-minbitwidth.ll| 82 +++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0a9e2c7f49f55f..1fbd69e38eaeec 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); + auto *CEI = cast(VL0); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { - CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -11671,6 +11671,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } +auto GetOperandSignedness = [&](unsigned Idx) { + const TreeEntry *OpE = getOperandEntry(E, Idx); + bool IsSigned = false; + auto It = MinBWs.find(OpE); + if (It != MinBWs.end()) +IsSigned = It->second.second; + else +IsSigned = any_of(OpE->Scalars, [&](Value *R) { + return !isKnownNonNegative(R, SimplifyQuery(*DL)); +}); + return IsSigned; +}; +ScalarArg = CEI->getArgOperand(I); +if (cast(OpVec->getType())->getElementType() != +ScalarArg->getType()) { + auto *CastTy = FixedVectorType::get(ScalarArg->getType(), + VecTy->getNumElements()); + OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); +} LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll new file mode 100644 index 00..27c9655f94d3c5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s + +define void @test(ptr %0, i8 %1, i1 %cmp12.i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0 +; CHECK-NEXT:[[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:[[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0 +; CHECK-NEXT:[[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:br label [[PRE:%.*]] +; CHECK: pre: +; CHECK-NEXT:[[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32> +; CHECK-NEXT:[[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT:[[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i8> +; CHECK-NEXT:[[TMP9:%.*]] = add <8 x i8> [[TMP8]], +; CHECK-NEXT:[[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] +; CHECK-NEXT:store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 +; CHECK-NEXT:br label [[PRE]] +; +entry: + %idx11 = getelementptr i8, ptr %0, i64 1 + %idx22 = getelementptr i8, ptr %0, i64 2 + %idx33 = getelementptr i8, ptr %0, i64 3 + %idx44 = getelementptr i8, ptr %0, i64 4 + %idx55 = getelementptr i8, ptr %0, i64 5 + %idx66 = getelementptr i8, ptr %0, i64 6 + %idx77 = getelementptr i8, ptr %0, i64 7 + br label %pre + +pre: + %conv.i = zext i8 %1 to i32 + %2 = tail call i
[llvm-branch-commits] [llvm] release/18.x [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis (PR #86731)
https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/86731 Need to support proper type conversion for function arguments to avoid compiler crash. >From a169761a39f409e0fe13cea6fbbf90a831c1f577 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2024 17:05:50 -0700 Subject: [PATCH] [SLP]Fix a crash if the argument of call was affected by minbitwidt analysis. Need to support proper type conversion for function arguments to avoid compiler crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp| 21 - .../X86/call-arg-reduced-by-minbitwidth.ll| 82 +++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0a9e2c7f49f55f..1fbd69e38eaeec 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); + auto *CEI = cast(VL0); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { - CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -11671,6 +11671,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } +auto GetOperandSignedness = [&](unsigned Idx) { + const TreeEntry *OpE = getOperandEntry(E, Idx); + bool IsSigned = false; + auto It = MinBWs.find(OpE); + if (It != MinBWs.end()) +IsSigned = It->second.second; + else +IsSigned = any_of(OpE->Scalars, [&](Value *R) { + return !isKnownNonNegative(R, SimplifyQuery(*DL)); +}); + return IsSigned; +}; +ScalarArg = CEI->getArgOperand(I); +if (cast(OpVec->getType())->getElementType() != +ScalarArg->getType()) { + auto *CastTy = FixedVectorType::get(ScalarArg->getType(), + VecTy->getNumElements()); + OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); +} LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll new file mode 100644 index 00..27c9655f94d3c5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s + +define void @test(ptr %0, i8 %1, i1 %cmp12.i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0 +; CHECK-NEXT:[[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:[[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0 +; CHECK-NEXT:[[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:br label [[PRE:%.*]] +; CHECK: pre: +; CHECK-NEXT:[[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32> +; CHECK-NEXT:[[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT:[[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i8> +; CHECK-NEXT:[[TMP9:%.*]] = add <8 x i8> [[TMP8]], +; CHECK-NEXT:[[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] +; CHECK-NEXT:store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 +; CHECK-NEXT:br label [[PRE]] +; +entry: + %idx11 = getelementptr i8, ptr %0, i64 1 + %idx22 = getelementptr i8, ptr %0, i64 2 + %idx33 = getelementptr i8, ptr %0, i64 3 + %idx44 = getelementptr i8, ptr %0, i64 4 + %idx55 = getelementptr i8, ptr %0, i64 5 + %idx66 = getelementptr i8, ptr %0, i64 6 + %idx77 = getelementptr i8, p
[llvm-branch-commits] [llvm] release 18.x [X86] Fix miscompile in combineShiftRightArithmetic (#86597) (PR #86728)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: AtariDreams (AtariDreams) Changes When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has differet types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a6) --- Full diff: https://github.com/llvm/llvm-project/pull/86728.diff 2 Files Affected: - (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+16-13) - (modified) llvm/test/CodeGen/X86/sar_fold.ll (+41) ``diff diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e64726fb6fff7..71fc6b5047eaa9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47035,10 +47035,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47054,29 +47057,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) +return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); -// skipping types without corresponding sext/zext and -// ShlConst that is not one of [56,48,32,24,16] +// Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); -SarConst = SarConst - (Size - ShiftSize); -if (SarConst == 0) +if (SraConst.eq(ShlConst)) return NN; -if (SarConst.isNegative()) +if (SraConst.ult(ShlConst)) return DAG.getNode(ISD::SHL, DL, VT, NN, - DAG.getConstant(-SarConst, DL, CVT)); + DAG.getConstant(ShlConst - SraConst, DL, CVT)); return DAG.getNode(ISD::SRA, DL, VT, NN, - DAG.getConstant(SarConst, DL, CVT)); + DAG.getConstant(SraConst - ShlConst, DL, CVT)); } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/sar_fold.ll b/llvm/test/CodeGen/X86/sar_fold.ll index 21655e19440afe..0f1396954b03a1 100644 --- a/llvm/test/CodeGen/X86/sar_fold.ll +++ b/llvm/test/CodeGen/X86/sar_fold.ll @@ -44,3 +44,44 @@ define i32 @shl24sar25(i32 %a) #0 { %2 = ashr exact i32 %1, 25 ret i32 %2 } + +define void @shl144sar48(ptr %p) #0 { +; CHECK-LABEL: shl144sar48: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:movl %ecx, %edx +; CHECK-NEXT:sarl $31, %edx +; CHECK-NEXT:shldl $2, %ecx, %edx +; CHECK-NEXT:shll $2, %ecx +; CHECK-NEXT:movl %ecx, 12(%eax) +; CHECK-NEXT:movl %edx, 16(%eax) +; CHECK-NEXT:movl $0, 8(%eax) +; CHECK-NEXT:movl $0, 4(%eax) +; CHECK-NEXT:movl $0, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 46 + store i160 %2, ptr %p + ret void +} + +define void @shl144sar2(ptr %p) #0 { +; CHECK-LABEL: shl144sar2: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:shll $14, %ecx +; CHECK-NEXT:movl %ecx, 16(%eax) +; CHECK-NEXT:movl $0, 8(%eax) +; CHECK-NEXT:movl $0, 12(%eax) +; CHECK-NEXT:movl $0, 4(%eax) +; CHECK-NEXT:movl $0, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 2 + store i160 %2, ptr %p + ret void +} `` https://github.com/llvm/llvm-project/pull/86728 ___ llvm-branch-commits mailing list llvm-branch-commit
[llvm-branch-commits] [llvm] release 18.x [X86] Fix miscompile in combineShiftRightArithmetic (#86597) (PR #86728)
https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/86728 When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has differet types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a6) >From 9b30b09c19df92572120136368d4435f9e4b77cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Pettersson?= Date: Tue, 26 Mar 2024 20:53:34 +0100 Subject: [PATCH] [X86] Fix miscompile in combineShiftRightArithmetic (#86597) When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has differet types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a6) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 + llvm/test/CodeGen/X86/sar_fold.ll | 41 + 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e64726fb6fff7..71fc6b5047eaa9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47035,10 +47035,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47054,29 +47057,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) +return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); -// skipping types without corresponding sext/zext and -// ShlConst that is not one of [56,48,32,24,16] +// Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); -SarConst = SarConst - (Size - ShiftSize); -if (SarConst == 0) +if (SraConst.eq(ShlConst)) return NN; -if (SarConst.isNegative()) +if (SraConst.ult(ShlConst)) return DAG.getNode(ISD::SHL, DL, VT, NN, - DAG.getConstant(-SarConst, DL, CVT)); + DAG.getConstant(ShlConst - SraConst, DL, CVT)); return DAG.getNode(ISD::SRA, DL, VT, NN, - DAG.getConstant(SarConst, DL, CVT)); + DAG.getConstant(SraConst - ShlConst, DL, CVT)); } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/sar_fold.ll b/llvm/test/CodeGen/X86/sar_fold.ll index 21655e19440afe..0f1396954b03a1 100644 --- a/llvm/test/CodeGen/X86/sar_fold.ll +++ b/llvm/test/CodeGen/X86/sar_fold.ll @@ -44,3 +44,44 @@ define i32 @shl24sar25(i32 %a) #0 { %2 = ashr exact i32 %1, 25 ret i32 %2 } + +define void @shl144sar48(ptr %p) #0 { +; CHECK-LABEL: shl144sar48: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT:movswl (%eax), %ecx +; CHECK-NEXT:movl %ecx, %edx +; CHECK-NEXT:sarl $31, %edx +; CHECK-NEXT:shldl $2, %ecx, %edx +; CHECK-NEXT:shll $2, %ecx +; CHECK-NEXT:movl %ecx, 12(%eax) +; CHECK-NEXT:movl %edx, 16(%eax) +; CHECK-NEXT:movl $0, 8(%eax) +; CHECK-NEXT:movl $0, 4(%eax) +; CHECK-NEXT:movl $0, (%eax) +; CHECK-NEXT:retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 46 + store i160 %2, ptr %p + ret void +} + +define void @shl144sar2(ptr %p) #0 { +; CHECK-LABEL: shl144sar2: +; CHECK: # %bb.0: +; CHECK-NEXT:movl {{[0-9]+}}(%esp), %eax +;
[llvm-branch-commits] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Set EntryDiscriminator in YAML profile for indirect calls (PR #82128)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/82128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SLP] Fix a crash if the argument of call was affected by minbitwidt analysis (PR #86701)
https://github.com/AtariDreams closed https://github.com/llvm/llvm-project/pull/86701 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SLP] Fix a crash if the argument of call was affected by minbitwidt analysis (PR #86701)
https://github.com/AtariDreams reopened https://github.com/llvm/llvm-project/pull/86701 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x [X86_64] fix SSE type error in vaarg (PR #86698)
https://github.com/AtariDreams updated https://github.com/llvm/llvm-project/pull/86698 >From 707fa564f1b047442d731290e6a634b8df308692 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Tue, 26 Mar 2024 09:19:42 +0800 Subject: [PATCH] [X86_64] fix SSE type error in vaarg. (#86377) tweak the position of the ++neededSSE when Lo is NoClass and Hi is SSE. Fix #86371. (cherry picked from commit 9c8dd5e6f6bd93deb95de9642632223f54a18a11) --- clang/lib/CodeGen/Targets/X86.cpp | 3 +- clang/test/CodeGen/X86/x86_64-vaarg.c | 72 ++ clang/test/CodeGenCXX/x86_64-vaarg.cpp | 70 + 3 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/X86/x86_64-vaarg.c create mode 100644 clang/test/CodeGenCXX/x86_64-vaarg.cpp diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 2291c991fb1107..60f986d9ef50e0 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -2788,12 +2788,11 @@ X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, // memory), except in situations involving unions. case X87Up: case SSE: +++neededSSE; HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); - -++neededSSE; break; // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the diff --git a/clang/test/CodeGen/X86/x86_64-vaarg.c b/clang/test/CodeGen/X86/x86_64-vaarg.c new file mode 100644 index 00..19e98dee97be35 --- /dev/null +++ b/clang/test/CodeGen/X86/x86_64-vaarg.c @@ -0,0 +1,72 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s + + +typedef struct { struct {} a; } empty; + +// CHECK-LABEL: define dso_local void @empty_record_test( +// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1 +// CHECK-NEXT:[[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT:[[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT:store i32 [[Z]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT:[[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT:[[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:[[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT:[[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT:[[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 0 +// CHECK-NEXT:store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[OVERFLOW_ARG_AREA]], i64 0, i1 false) +// CHECK-NEXT:ret void +// +empty empty_record_test(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, empty); +} + +typedef struct { + struct{} a; + double b; +} s1; + +// CHECK-LABEL: define dso_local double @f( +// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK-NEXT:[[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT:[[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT:store i32 [[Z]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT:[[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT:[[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:[[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 +// CHECK-NEXT:[[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT:[[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160 +// CHECK-NEXT:br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT:[[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT:[[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT:[[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] +// CHECK-NEXT:[[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16 +// CHECK-NEXT:store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT:br labe
[llvm-branch-commits] [libcxx] [libc++][chrono] Adds the sys_info class. (PR #85619)
@@ -0,0 +1,1374 @@ +//===--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class time_zone; + +// template +// sys_info get_info(const sys_time<_Duration>& time) const; + +// This test uses the system provided database. This makes the test portable, +// but may cause failures when the database information changes. Historic data +// may change if new facts are uncovered, future data may change when regions +// change their time zone or daylight saving time. Most tests will not look in +// the future to attempt to avoid issues. All tests list the data on which they +// are based, this makes debugging easier upon failure; including to see whether +// the provided data has not been changed +// +// +// The data in the tests can be validated by using the zdump tool. For +// example +// zdump -v Asia/Hong_Kong +// show all transistions in the Hong Kong time zone. Or +// zdump -c1970,1980 -v Asia/Hong_Kong +// shows all transitions in Hong Kong between 1970 and 1980. + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +/* * HELPERS * */ + +[[nodiscard]] static std::chrono::sys_seconds to_sys_seconds( +std::chrono::year year, +std::chrono::month month, +std::chrono::day day, +std::chrono::hours h = std::chrono::hours(0), +std::chrono::minutes m = std::chrono::minutes{0}, +std::chrono::seconds s = std::chrono::seconds{0}) { + std::chrono::year_month_day result{year, month, day}; + + return std::chrono::time_point_cast(static_cast(result)) + h + m + s; +} + +static void assert_equal(const std::chrono::sys_info& lhs, const std::chrono::sys_info& rhs) { + TEST_REQUIRE(lhs.begin == rhs.begin, + TEST_WRITE_CONCATENATED("\nBegin:\nExpected output ", lhs.begin, "\nActual output ", rhs.begin, '\n')); + TEST_REQUIRE(lhs.end == rhs.end, + TEST_WRITE_CONCATENATED("\nEnd:\nExpected output ", lhs.end, "\nActual output ", rhs.end, '\n')); + TEST_REQUIRE( + lhs.offset == rhs.offset, + TEST_WRITE_CONCATENATED("\nOffset:\nExpected output ", lhs.offset, "\nActual output ", rhs.offset, '\n')); + TEST_REQUIRE(lhs.save == rhs.save, + TEST_WRITE_CONCATENATED("\nSave:\nExpected output ", lhs.save, "\nActual output ", rhs.save, '\n')); + TEST_REQUIRE( + lhs.abbrev == rhs.abbrev, + TEST_WRITE_CONCATENATED("\nAbbrev:\nExpected output ", lhs.abbrev, "\nActual output ", rhs.abbrev, '\n')); +} + +static void assert_equal(std::string_view expected, const std::chrono::sys_info& value) { + // Note the output of operator<< is implementation defined, use this + // format to keep the test portable. + std::string result = std::format( + "[{}, {}) {:%T} {:%Q%q} {}", + value.begin, + value.end, + std::chrono::hh_mm_ss{value.offset}, + value.save, + value.abbrev); + + TEST_REQUIRE(expected == result, + TEST_WRITE_CONCATENATED("\nExpected output ", expected, "\nActual output ", result, '\n')); +} + +static void +assert_range(std::string_view expected, const std::chrono::sys_info& begin, const std::chrono::sys_info& end) { + assert_equal(expected, begin); + assert_equal(expected, end); +} + +static void assert_cycle( +std::string_view expected_1, +const std::chrono::sys_info& begin_1, +const std::chrono::sys_info& end_1, +std::string_view expected_2, +const std::chrono::sys_info& begin_2, +const std::chrono::sys_info& end_2 + +) { + assert_range(expected_1, begin_1, end_1); + assert_range(expected_2, begin_2, end_2); +} + +/* * TESTS * */ + +static void test_gmt() { + // Simple zone always valid, no rule entries, lookup using a link. + // L Etc/GMT GMT + // Z Etc/GMT 0 - GMT + + const std::chrono::time_zone* tz = std::chrono::locate_zone("GMT"); + + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + std::chrono::sys_seconds::max(), + std::chrono::seconds(0), + std::chrono::minutes(0), + "GMT"), + tz->get_info(std::chrono::sys_seconds::min())); + assert_equal( + std::chrono::sys_info( + std::chrono::sys_seconds::min(), + std::chrono::sys_seconds::max(), + std::chrono::seconds(0), + std::chrono::minutes(0), + "GMT"), + tz->get_info(std::chrono::sys_seconds(std::chrono::seconds{0}))); + +
[llvm-branch-commits] [libcxx] [libc++][chrono] Adds the sys_info class. (PR #85619)
@@ -0,0 +1,53 @@ +// -*- C++ -*- +//===--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html + +#ifndef _LIBCPP___CHRONO_SYS_INFO_H +#define _LIBCPP___CHRONO_SYS_INFO_H + +#include +// Enable the contents of the header only when libc++ was built with experimental features enabled. +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) + +# include <__chrono/duration.h> +# include <__chrono/system_clock.h> +# include <__chrono/time_point.h> +# include <__config> +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +# if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ + !defined(_LIBCPP_HAS_NO_LOCALIZATION) + +namespace chrono { + +struct sys_info { ldionne wrote: Do you have a test where you actually check that `sys_info` can be created as e.g. ```c++ sys_info info{a, b, c, d, e}; assert(info.begin == a); // etc... ``` Also ```c++ sys_info info{.begin = a, etc..}; ``` This is technically just an aggregate type so we should ensure that ours is one. Actually, I would replace the test `libcxx/test/std/time/time.zone/time.zone.info/time.zone.info.sys/sys_info.members.compile.pass.cpp` by this test. https://github.com/llvm/llvm-project/pull/85619 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++][chrono] Adds the sys_info class. (PR #85619)
https://github.com/ldionne edited https://github.com/llvm/llvm-project/pull/85619 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++][chrono] Adds the sys_info class. (PR #85619)
@@ -0,0 +1,127 @@ +//===--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb, has-no-zdump + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// TODO TZDB Investigate +// XFAIL: target={{armv(7|8)l-linux-gnueabihf}} + +// This test compares the output of the zdump against the output based on the +// standard library implementation. It tests all available time zones and +// validates them. The specification of how to use the IANA database is limited +// and the real database contains quite a number of "interesting" cases. + +#include +#include +#include +#include + +#include "assert_macros.h" +#include "concat_macros.h" + +// The year range to validate. The dates used in practice are expected to be +// inside the tested range. +constexpr std::chrono::year first{1800}; +constexpr std::chrono::year last{2100}; + +// A custom sys_info class that also stores the name of the time zone. +// Its formatter matches the output of zdump. +struct sys_info : public std::chrono::sys_info { + sys_info(std::string_view name_, std::chrono::sys_info info) : std::chrono::sys_info{info}, name{name_} {} + + std::string name; +}; + +template <> +struct std::formatter { + template + constexpr typename ParseContext::iterator parse(ParseContext& ctx) { +return ctx.begin(); + } + + template + typename FormatContext::iterator format(const sys_info& info, FormatContext& ctx) const { +using namespace std::literals::chrono_literals; + +// Every "sys_info" entry of zdump consists of 2 lines. +// - 1 for first second of the range +// - 1 for last second of the range +// For example: +// Africa/Casablanca Sun Mar 25 02:00:00 2018 UT = Sun Mar 25 03:00:00 2018 +01 isdst=1 gmtoff=3600 +// Africa/Casablanca Sun May 13 01:59:59 2018 UT = Sun May 13 02:59:59 2018 +01 isdst=1 gmtoff=3600 + +if (info.begin != std::chrono::sys_seconds::min()) + ctx.advance_to(std::format_to( + ctx.out(), + "{} {:%a %b %e %H:%M:%S %Y} UT = {:%a %b %e %H:%M:%S %Y} {} isdst={:d} gmtoff={:%Q}\n", + info.name, + info.begin, + info.begin + info.offset, + info.abbrev, + info.save != 0s, + info.offset)); + +if (info.end != std::chrono::sys_seconds::max()) + ctx.advance_to(std::format_to( + ctx.out(), + "{} {:%a %b %e %H:%M:%S %Y} UT = {:%a %b %e %H:%M:%S %Y} {} isdst={:d} gmtoff={:%Q}\n", + info.name, + info.end - 1s, + info.end - 1s + info.offset, + info.abbrev, + info.save != 0s, + info.offset)); + +return ctx.out(); + } +}; + +void process(std::ostream& stream, const std::chrono::time_zone& zone) { + using namespace std::literals::chrono_literals; + + constexpr auto begin = std::chrono::time_point_cast( + static_cast(std::chrono::year_month_day{first, std::chrono::January, 1d})); + constexpr auto end = std::chrono::time_point_cast( + static_cast(std::chrono::year_month_day{last, std::chrono::January, 1d})); + + std::chrono::sys_seconds s = begin; + do { +sys_info info{zone.name(), zone.get_info(s)}; + +if (info.end >= end) + info.end = std::chrono::sys_seconds::max(); + +stream << std::format("{}", info); +s = info.end; + } while (s != std::chrono::sys_seconds::max()); +} + +int main(int, const char**) { + const std::chrono::tzdb& tzdb = std::chrono::get_tzdb(); + std::string file = std::tmpnam(nullptr); ldionne wrote: We have a facility for obtaining temporary files and making sure they get removed properly after. Let's use it! https://github.com/llvm/llvm-project/pull/85619 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] [libc++][chrono] Adds the sys_info class. (PR #85619)
https://github.com/ldionne commented: We can dive into the implementation during the next round of review! https://github.com/llvm/llvm-project/pull/85619 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SLP] Fix a crash if the argument of call was affected by minbitwidt analysis (PR #86701)
https://github.com/AtariDreams closed https://github.com/llvm/llvm-project/pull/86701 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/18.x: [lld/ELF][X86] Respect outSecOff when checking if GOTPCREL can be relaxed (#86334) (PR #86688)
MaskRay wrote: The patch in question fixed a regression (assert in certain cases) introduced by https://reviews.llvm.org/D157020 https://github.com/llvm/llvm-project/pull/86688 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/18.x: [lld/ELF][X86] Respect outSecOff when checking if GOTPCREL can be relaxed (#86334) (PR #86688)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/86688 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SLP] Fix a crash if the argument of call was affected by minbitwidt analysis (PR #86701)
https://github.com/alexey-bataev approved this pull request. LG https://github.com/llvm/llvm-project/pull/86701 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SLP] Fix a crash if the argument of call was affected by minbitwidt analysis (PR #86701)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: AtariDreams (AtariDreams) Changes Need to support proper type conversion for function arguments to avoid compiler crash. (cherry-picked from commit 3942bd2fb56380aa050977dc6aede011e191d9b0). --- Full diff: https://github.com/llvm/llvm-project/pull/86701.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+8-1) - (added) llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll (+82) ``diff diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0a9e2c7f49f55f..07fdd5e97a373d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); + auto *CEI = cast(VL0); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { - CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -11671,6 +11671,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } +ScalarArg = CEI->getArgOperand(I); +if (cast(OpVec->getType())->getElementType() != +ScalarArg->getType()) { + auto *CastTy = FixedVectorType::get(ScalarArg->getType(), + VecTy->getNumElements()); + OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); +} LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll new file mode 100644 index 00..27c9655f94d3c5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s + +define void @test(ptr %0, i8 %1, i1 %cmp12.i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0 +; CHECK-NEXT:[[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:[[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0 +; CHECK-NEXT:[[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:br label [[PRE:%.*]] +; CHECK: pre: +; CHECK-NEXT:[[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32> +; CHECK-NEXT:[[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT:[[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i8> +; CHECK-NEXT:[[TMP9:%.*]] = add <8 x i8> [[TMP8]], +; CHECK-NEXT:[[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] +; CHECK-NEXT:store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 +; CHECK-NEXT:br label [[PRE]] +; +entry: + %idx11 = getelementptr i8, ptr %0, i64 1 + %idx22 = getelementptr i8, ptr %0, i64 2 + %idx33 = getelementptr i8, ptr %0, i64 3 + %idx44 = getelementptr i8, ptr %0, i64 4 + %idx55 = getelementptr i8, ptr %0, i64 5 + %idx66 = getelementptr i8, ptr %0, i64 6 + %idx77 = getelementptr i8, ptr %0, i64 7 + br label %pre + +pre: + %conv.i = zext i8 %1 to i32 + %2 = tail call i32 @llvm.umax.i32(i32 %conv.i, i32 1) + %.sroa.speculated.i = add i32 %2, 1 + %intensity.0.i = select i1 %cmp12.i, i32 %.sroa.speculated.i, i32 %conv.i + %conv14.i = trunc i32 %intensity.0.i to i8 + store i8 %conv14.i, ptr %0, align 1 + %conv.i.1 = zext i8 %1 to i32 + %3 = tail call i32 @llvm.umax.i32(i32 %conv.i.1, i32 1) + %ss1 = add i32 %3, 1 + %ii1 = select i1 %cmp12.i, i32 %ss1, i32 %conv.i.1 + %conv14.i.1 = trunc i32 %ii1 to i8 + store i8 %conv14.i.1, ptr %idx11, align 1 + %conv.i.2 = zext i8 %1 to i32 + %4 = tail call i32 @llvm.umax.i32(i32 %conv.i.2, i32 1) + %ss2 = add i32 %4, 1 + %ii2 = select
[llvm-branch-commits] [llvm] release/18.x [SLP] Fix a crash if the argument of call was affected by minbitwidt analysis (PR #86701)
https://github.com/AtariDreams edited https://github.com/llvm/llvm-project/pull/86701 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SLP]Fix a crash if the argument of call was affected by minbitwidt analysis (PR #86701)
https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/86701 Need to support proper type conversion for function arguments to avoid compiler crash. (cherry-picked from commit 3942bd2fb56380aa050977dc6aede011e191d9b0). >From 02778184f1a05b264f4029cc405fe7f2ccfb1bb4 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 21 Mar 2024 17:05:50 -0700 Subject: [PATCH] [SLP]Fix a crash if the argument of call was affected by minbitwidt analysis. Need to support proper type conversion for function arguments to avoid compiler crash. --- .../Transforms/Vectorize/SLPVectorizer.cpp| 9 +- .../X86/call-arg-reduced-by-minbitwidth.ll| 82 +++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 0a9e2c7f49f55f..07fdd5e97a373d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1)) TysForDecl.push_back( FixedVectorType::get(CI->getType(), E->Scalars.size())); + auto *CEI = cast(VL0); for (unsigned I : seq(0, CI->arg_size())) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { - CallInst *CEI = cast(VL0); ScalarArg = CEI->getArgOperand(I); OpVecs.push_back(CEI->getArgOperand(I)); if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) @@ -11671,6 +11671,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); return E->VectorizedValue; } +ScalarArg = CEI->getArgOperand(I); +if (cast(OpVec->getType())->getElementType() != +ScalarArg->getType()) { + auto *CastTy = FixedVectorType::get(ScalarArg->getType(), + VecTy->getNumElements()); + OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I)); +} LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n"); OpVecs.push_back(OpVec); if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll new file mode 100644 index 00..27c9655f94d3c5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s + +define void @test(ptr %0, i8 %1, i1 %cmp12.i) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0 +; CHECK-NEXT:[[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:[[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0 +; CHECK-NEXT:[[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer +; CHECK-NEXT:br label [[PRE:%.*]] +; CHECK: pre: +; CHECK-NEXT:[[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32> +; CHECK-NEXT:[[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> ) +; CHECK-NEXT:[[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i8> +; CHECK-NEXT:[[TMP9:%.*]] = add <8 x i8> [[TMP8]], +; CHECK-NEXT:[[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]] +; CHECK-NEXT:store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1 +; CHECK-NEXT:br label [[PRE]] +; +entry: + %idx11 = getelementptr i8, ptr %0, i64 1 + %idx22 = getelementptr i8, ptr %0, i64 2 + %idx33 = getelementptr i8, ptr %0, i64 3 + %idx44 = getelementptr i8, ptr %0, i64 4 + %idx55 = getelementptr i8, ptr %0, i64 5 + %idx66 = getelementptr i8, ptr %0, i64 6 + %idx77 = getelementptr i8, ptr %0, i64 7 + br label %pre + +pre: + %conv.i = zext i8 %1 to i32 + %2 = tail call i32 @llvm.umax.i32(i32 %conv.i, i32 1) + %.sroa.speculated.i = add i32 %2, 1 + %intensity.0.i = select i1 %cmp12.i, i32 %.sroa.speculated.i, i32 %conv.i + %conv14.i = trunc i32 %intensity.0.i to i8 + store i8 %conv14.i, ptr %0, align 1 + %conv.i.1 = zext i8 %1 to i32 + %3 = tail call i32 @llvm.umax.i32(i32
[llvm-branch-commits] [flang] [Flang][OpenMP][MLIR] Initial derived type member map support (PR #82853)
agozillon wrote: Small ping for review of this PR stack if possible. I'll be away on vacation for two and a half weeks, but it would be excellent to come back to review comments on this PR stack that I can address and then make some forward progress on. No rush obviously as there will be a 2 week time lag before it's all addressed but reviews would be appreciated! https://github.com/llvm/llvm-project/pull/82853 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/18.x [X86_64] fix SSE type error in vaarg (PR #86698)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: AtariDreams (AtariDreams) Changes tweak the position of the ++neededSSE when Lo is NoClass and Hi is SSE. Fix #86371. (cherry picked from commit 9c8dd5e6f6bd93deb95de9642632223f54a18a11) --- Full diff: https://github.com/llvm/llvm-project/pull/86698.diff 3 Files Affected: - (modified) clang/lib/CodeGen/Targets/X86.cpp (+1-2) - (added) clang/test/CodeGen/X86/x86_64-vaarg.c (+69) - (added) clang/test/CodeGenCXX/x86_64-vaarg.cpp (+67) ``diff diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 2291c991fb1107..60f986d9ef50e0 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -2788,12 +2788,11 @@ X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, // memory), except in situations involving unions. case X87Up: case SSE: +++neededSSE; HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); - -++neededSSE; break; // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the diff --git a/clang/test/CodeGen/X86/x86_64-vaarg.c b/clang/test/CodeGen/X86/x86_64-vaarg.c new file mode 100644 index 00..a18ba836423881 --- /dev/null +++ b/clang/test/CodeGen/X86/x86_64-vaarg.c @@ -0,0 +1,69 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s + + +typedef struct { struct {} a; } empty; + +// CHECK-LABEL: define dso_local void @empty_record_test( +// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1 +// CHECK-NEXT:[[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT:[[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT:[[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1 +// CHECK-NEXT:store i32 [[Z]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT:[[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT:[[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 0, i1 false) +// CHECK-NEXT:ret void +// +empty empty_record_test(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, empty); +} + +typedef struct { + struct{} a; + double b; +} s1; + +// CHECK-LABEL: define dso_local double @f( +// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK-NEXT:[[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT:[[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT:store i32 [[Z]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT:[[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT:[[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:[[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 +// CHECK-NEXT:[[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT:[[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160 +// CHECK-NEXT:br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT:[[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT:[[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT:[[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] +// CHECK-NEXT:[[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16 +// CHECK-NEXT:store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT:br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT:[[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT:[[OVERFLOW_ARG_AREA:%.*]] = load ptr, ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT:[[OVERFLOW_ARG_AREA_NEXT:%.*]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i32 8 +// CHECK-NEXT:store ptr [[OVERFLOW_ARG_AREA_NEXT]], ptr [[OVERFLOW_ARG_AREA_P]], align 8 +// CHECK-NEXT:br label [[VAARG_END]] +// CHECK: vaarg.end: +// CHECK-NEXT:[[VAARG_ADDR:%.*]] = phi ptr [ [[TMP1]], [[VAARG_IN_REG]] ], [ [[OVERFLOW_ARG_AREA]], [[V
[llvm-branch-commits] [clang] release/18.x [X86_64] fix SSE type error in vaarg (PR #86698)
https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/86698 tweak the position of the ++neededSSE when Lo is NoClass and Hi is SSE. Fix #86371. (cherry picked from commit 9c8dd5e6f6bd93deb95de9642632223f54a18a11) >From 1e355856dda8f67f5431ac3d9000ce8ceef25743 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Tue, 26 Mar 2024 09:19:42 +0800 Subject: [PATCH] [X86_64] fix SSE type error in vaarg. (#86377) tweak the position of the ++neededSSE when Lo is NoClass and Hi is SSE. Fix #86371. (cherry picked from commit 9c8dd5e6f6bd93deb95de9642632223f54a18a11) --- clang/lib/CodeGen/Targets/X86.cpp | 3 +- clang/test/CodeGen/X86/x86_64-vaarg.c | 69 ++ clang/test/CodeGenCXX/x86_64-vaarg.cpp | 67 + 3 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/X86/x86_64-vaarg.c create mode 100644 clang/test/CodeGenCXX/x86_64-vaarg.cpp diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 2291c991fb1107..60f986d9ef50e0 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -2788,12 +2788,11 @@ X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, // memory), except in situations involving unions. case X87Up: case SSE: +++neededSSE; HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8); if (Lo == NoClass) // Pass HighPart at offset 8 in memory. return ABIArgInfo::getDirect(HighPart, 8); - -++neededSSE; break; // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the diff --git a/clang/test/CodeGen/X86/x86_64-vaarg.c b/clang/test/CodeGen/X86/x86_64-vaarg.c new file mode 100644 index 00..a18ba836423881 --- /dev/null +++ b/clang/test/CodeGen/X86/x86_64-vaarg.c @@ -0,0 +1,69 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s + + +typedef struct { struct {} a; } empty; + +// CHECK-LABEL: define dso_local void @empty_record_test( +// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[RETVAL:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1 +// CHECK-NEXT:[[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT:[[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT:[[TMP:%.*]] = alloca [[STRUCT_EMPTY]], align 1 +// CHECK-NEXT:store i32 [[Z]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT:[[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT:[[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[RETVAL]], ptr align 1 [[TMP]], i64 0, i1 false) +// CHECK-NEXT:ret void +// +empty empty_record_test(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, empty); +} + +typedef struct { + struct{} a; + double b; +} s1; + +// CHECK-LABEL: define dso_local double @f( +// CHECK-SAME: i32 noundef [[Z:%.*]], ...) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[RETVAL:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK-NEXT:[[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT:[[LIST:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT:store i32 [[Z]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT:[[ARRAYDECAY:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:call void @llvm.va_start(ptr [[ARRAYDECAY]]) +// CHECK-NEXT:[[ARRAYDECAY1:%.*]] = getelementptr inbounds [1 x %struct.__va_list_tag], ptr [[LIST]], i64 0, i64 0 +// CHECK-NEXT:[[FP_OFFSET_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG:%.*]], ptr [[ARRAYDECAY1]], i32 0, i32 1 +// CHECK-NEXT:[[FP_OFFSET:%.*]] = load i32, ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT:[[FITS_IN_FP:%.*]] = icmp ule i32 [[FP_OFFSET]], 160 +// CHECK-NEXT:br i1 [[FITS_IN_FP]], label [[VAARG_IN_REG:%.*]], label [[VAARG_IN_MEM:%.*]] +// CHECK: vaarg.in_reg: +// CHECK-NEXT:[[TMP0:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 3 +// CHECK-NEXT:[[REG_SAVE_AREA:%.*]] = load ptr, ptr [[TMP0]], align 16 +// CHECK-NEXT:[[TMP1:%.*]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i32 [[FP_OFFSET]] +// CHECK-NEXT:[[TMP2:%.*]] = add i32 [[FP_OFFSET]], 16 +// CHECK-NEXT:store i32 [[TMP2]], ptr [[FP_OFFSET_P]], align 4 +// CHECK-NEXT:br label [[VAARG_END:%.*]] +// CHECK: vaarg.in_mem: +// CHECK-NEXT:[[OVERFLOW_ARG_AREA_P:%.*]] = getelementptr inbounds [[STRUCT___VA_LIST_TAG]], ptr [[ARRAYDECAY1]], i32 0, i32 2 +// CHECK-NEXT:[[OVERFLOW_ARG_AREA:
[llvm-branch-commits] [llvm] release/18.x [SelectionDAG] Prevent combination on inconsistent type in 'carryDiamond' (PR #86697)
llvmbot wrote: @llvm/pr-subscribers-backend-x86 Author: AtariDreams (AtariDreams) Changes Backport of commit https://github.com/llvm/llvm-project/commit/cb4453dc69d75064c9a82e9a6a9bf0d0ded4b204 --- Full diff: https://github.com/llvm/llvm-project/pull/86697.diff 2 Files Affected: - (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6-1) - (modified) llvm/test/CodeGen/X86/addcarry.ll (+23) ``diff diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3135ec73a99e76..e806e0f0731f23 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3575,6 +3575,11 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, return SDValue(); if (Opcode != ISD::UADDO && Opcode != ISD::USUBO) return SDValue(); + // Guarantee identical type of CarryOut + EVT CarryOutType = N->getValueType(0); + if (CarryOutType != Carry0.getValue(1).getValueType() || + CarryOutType != Carry1.getValue(1).getValueType()) +return SDValue(); // Canonicalize the add/sub of A and B (the top node in the above ASCII art) // as Carry0 and the add/sub of the carry in as Carry1 (the middle node). @@ -3622,7 +3627,7 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, // TODO: match other operations that can merge flags (ADD, etc) DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0)); if (N->getOpcode() == ISD::AND) -return DAG.getConstant(0, DL, MVT::i1); +return DAG.getConstant(0, DL, CarryOutType); return Merged.getValue(1); } diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll index 3fc4ed99fad0fa..f8d32fc2d29252 100644 --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -1490,3 +1490,26 @@ define { i64, i64 } @addcarry_commutative_2(i64 %x0, i64 %x1, i64 %y0, i64 %y1) %r1 = insertvalue { i64, i64 } %r0, i64 %b1s, 1 ret { i64, i64 } %r1 } + +define i1 @pr84831(i64 %arg) { +; CHECK-LABEL: pr84831: +; CHECK: # %bb.0: +; CHECK-NEXT:testq %rdi, %rdi +; CHECK-NEXT:setne %al +; CHECK-NEXT:xorl %ecx, %ecx +; CHECK-NEXT:addb $-1, %al +; CHECK-NEXT:adcq $1, %rcx +; CHECK-NEXT:setb %al +; CHECK-NEXT:retq + %a = icmp ult i64 0, %arg + %add1 = add i64 0, 1 + %carryout1 = icmp ult i64 %add1, 0 + %b = zext i1 %a to i64 + %add2 = add i64 %add1, %b + %carryout2 = icmp ult i64 %add2, %add1 + %zc1 = zext i1 %carryout1 to i63 + %zc2 = zext i1 %carryout2 to i63 + %or = or i63 %zc1, %zc2 + %trunc = trunc i63 %or to i1 + ret i1 %trunc +} `` https://github.com/llvm/llvm-project/pull/86697 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SelectionDAG] Prevent combination on inconsistent type in 'carryDiamond' (PR #86697)
https://github.com/AtariDreams edited https://github.com/llvm/llvm-project/pull/86697 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SelectionDAG] Prevent combination on inconsistent type in 'carryDiamond' (#84888) (PR #86697)
https://github.com/AtariDreams edited https://github.com/llvm/llvm-project/pull/86697 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/18.x [SelectionDAG] Prevent combination on inconsistent type in 'carryDiamond` (#84888) (PR #86697)
https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/86697 Backport of commit https://github.com/llvm/llvm-project/commit/cb4453dc69d75064c9a82e9a6a9bf0d0ded4b204 >From 0edb553acc9cdfa0b3b94732ea7db05829090eaf Mon Sep 17 00:00:00 2001 From: XChy Date: Fri, 22 Mar 2024 18:35:20 +0800 Subject: [PATCH] [SelectionDAG] Prevent combination on inconsistent type in `combineCarryDiamond` (#84888) Fixes #84831 When matching carry pattern with `getAsCarry`, it may produce different type of carryout. This patch checks such case and does early exit. I'm new to DAG, any suggestion is appreciated. (cherry picked from commit cb4453dc69d75064c9a82e9a6a9bf0d0ded4b204) --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 +- llvm/test/CodeGen/X86/addcarry.ll | 23 +++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3135ec73a99e76..e806e0f0731f23 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3575,6 +3575,11 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, return SDValue(); if (Opcode != ISD::UADDO && Opcode != ISD::USUBO) return SDValue(); + // Guarantee identical type of CarryOut + EVT CarryOutType = N->getValueType(0); + if (CarryOutType != Carry0.getValue(1).getValueType() || + CarryOutType != Carry1.getValue(1).getValueType()) +return SDValue(); // Canonicalize the add/sub of A and B (the top node in the above ASCII art) // as Carry0 and the add/sub of the carry in as Carry1 (the middle node). @@ -3622,7 +3627,7 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI, // TODO: match other operations that can merge flags (ADD, etc) DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0)); if (N->getOpcode() == ISD::AND) -return DAG.getConstant(0, DL, MVT::i1); +return DAG.getConstant(0, DL, CarryOutType); return Merged.getValue(1); } diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll index 3fc4ed99fad0fa..f8d32fc2d29252 100644 --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -1490,3 +1490,26 @@ define { i64, i64 } @addcarry_commutative_2(i64 %x0, i64 %x1, i64 %y0, i64 %y1) %r1 = insertvalue { i64, i64 } %r0, i64 %b1s, 1 ret { i64, i64 } %r1 } + +define i1 @pr84831(i64 %arg) { +; CHECK-LABEL: pr84831: +; CHECK: # %bb.0: +; CHECK-NEXT:testq %rdi, %rdi +; CHECK-NEXT:setne %al +; CHECK-NEXT:xorl %ecx, %ecx +; CHECK-NEXT:addb $-1, %al +; CHECK-NEXT:adcq $1, %rcx +; CHECK-NEXT:setb %al +; CHECK-NEXT:retq + %a = icmp ult i64 0, %arg + %add1 = add i64 0, 1 + %carryout1 = icmp ult i64 %add1, 0 + %b = zext i1 %a to i64 + %add2 = add i64 %add1, %b + %carryout2 = icmp ult i64 %add2, %add1 + %zc1 = zext i1 %carryout1 to i63 + %zc2 = zext i1 %carryout2 to i63 + %or = or i63 %zc1, %zc2 + %trunc = trunc i63 %or to i1 + ret i1 %trunc +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Analysis] Exclude llvm.allow.{runtime, ubsan}.check() from AliasSetTracker (PR #86065)
https://github.com/aeubanks approved this pull request. https://github.com/llvm/llvm-project/pull/86065 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] release/18x: [OpenMP][AIX] Affinity implementation for AIX (#84984) (PR #86695)
shiltian wrote: I think it's fine. WDYT? @tstellar https://github.com/llvm/llvm-project/pull/86695 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [openmp] release/18x: [OpenMP][AIX] Affinity implementation for AIX (#84984) (PR #86695)
https://github.com/xingxue-ibm created https://github.com/llvm/llvm-project/pull/86695 This patch implements `affinity` for AIX, which is quite different from platforms such as Linux. - Setting CPU affinity through masks and related functions are not supported. System call `bindprocessor()` is used to bind a thread to one CPU per call. - There are no system routines to get the affinity info of a thread. The implementation of `get_system_affinity()` for AIX gets the mask of all available CPUs, to be used as the full mask only. - Topology is not available from the file system. It is obtained through system SRAD (Scheduler Resource Allocation Domain). This patch has run through the libomp LIT tests successfully with `affinity` enabled. (cherry picked from commit d394f3a162b871668d0c8e8bf6a94922fa8698ae) **Note**: We need to include this patch in the libomp of the next IBM OpenXL for AIX compiler deliverable which is built off LLVM v18. This cherry-pick has been re-based on `release/18.x`. Changes in this patch are guarded for AIX and should not affect other platforms. >From 1158e9e92b4e53fbb07328b629d136cc47c1229f Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Fri, 22 Mar 2024 15:25:08 -0400 Subject: [PATCH] [OpenMP][AIX] Affinity implementation for AIX (#84984) This patch implements `affinity` for AIX, which is quite different from platforms such as Linux. - Setting CPU affinity through masks and related functions are not supported. System call `bindprocessor()` is used to bind a thread to one CPU per call. - There are no system routines to get the affinity info of a thread. The implementation of `get_system_affinity()` for AIX gets the mask of all available CPUs, to be used as the full mask only. - Topology is not available from the file system. It is obtained through system SRAD (Scheduler Resource Allocation Domain). This patch has run through the libomp LIT tests successfully with `affinity` enabled. (cherry picked from commit d394f3a162b871668d0c8e8bf6a94922fa8698ae) --- openmp/runtime/src/kmp.h| 4 +- openmp/runtime/src/kmp_affinity.cpp | 129 ++-- openmp/runtime/src/kmp_affinity.h | 73 +++- openmp/runtime/src/kmp_os.h | 3 +- openmp/runtime/src/z_Linux_util.cpp | 37 ++-- openmp/runtime/test/lit.cfg | 2 +- 6 files changed, 229 insertions(+), 19 deletions(-) diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index e3a1e20731bbe0..46ee4c9fba7109 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -818,6 +818,7 @@ class KMPAffinity { typedef KMPAffinity::Mask kmp_affin_mask_t; extern KMPAffinity *__kmp_affinity_dispatch; +#ifndef KMP_OS_AIX class kmp_affinity_raii_t { kmp_affin_mask_t *mask; bool restored; @@ -842,6 +843,7 @@ class kmp_affinity_raii_t { } ~kmp_affinity_raii_t() { restore(); } }; +#endif // !KMP_OS_AIX // Declare local char buffers with this size for printing debug and info // messages, using __kmp_affinity_print_mask(). @@ -3911,7 +3913,7 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); #if KMP_WEIGHTED_ITERATIONS_SUPPORTED extern int __kmp_get_first_osid_with_ecore(void); #endif -#if KMP_OS_LINUX || KMP_OS_FREEBSD +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_AIX extern int kmp_set_thread_affinity_mask_initial(void); #endif static inline void __kmp_assign_root_init_mask() { diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 6a41d34b023729..1ac541fbcaa707 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -2906,12 +2906,17 @@ static inline const char *__kmp_cpuinfo_get_envvar() { } // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the -// affinity map. +// affinity map. On AIX, the map is obtained through system SRAD (Scheduler +// Resource Allocation Domain). static bool __kmp_affinity_create_cpuinfo_map(int *line, kmp_i18n_id_t *const msg_id) { + *msg_id = kmp_i18n_null; + +#if KMP_OS_AIX + unsigned num_records = __kmp_xproc; +#else const char *filename = __kmp_cpuinfo_get_filename(); const char *envvar = __kmp_cpuinfo_get_envvar(); - *msg_id = kmp_i18n_null; if (__kmp_affinity.flags.verbose) { KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); @@ -2970,6 +2975,7 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, *msg_id = kmp_i18n_str_CantRewindCpuinfo; return false; } +#endif // KMP_OS_AIX // Allocate the array of records to store the proc info in. The dummy // element at the end makes the logic in filling them out easier to code. @@ -2999,6 +3005,99 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line, INIT_PROC_INFO(threadInfo[i]); } +#if KMP_OS_AIX + int smt_threads; + lpar_info_format1_t cpuinfo; + unsigned num_avail = __kmp_xproc; + + if (__kmp_affinity.flags.ver
[llvm-branch-commits] [openmp] release/18x: [OpenMP][AIX] Affinity implementation for AIX (#84984) (PR #86695)
https://github.com/xingxue-ibm milestoned https://github.com/llvm/llvm-project/pull/86695 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] release/18.x: [ODS][NFC] Cast range.size() to int32_t in accumulation (#85629) (PR #86677)
https://github.com/joker-eph approved this pull request. https://github.com/llvm/llvm-project/pull/86677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/18.x: [lld/ELF][X86] Respect outSecOff when checking if GOTPCREL can be relaxed (#86334) (PR #86688)
llvmbot wrote: @llvm/pr-subscribers-lld Author: None (llvmbot) Changes Backport 48048051323d5dd74057dc5f32df8c3c323afcd5 Requested by: @EugeneZelenko --- Full diff: https://github.com/llvm/llvm-project/pull/86688.diff 2 Files Affected: - (modified) lld/ELF/Arch/X86_64.cpp (+4-3) - (modified) lld/test/ELF/x86-64-gotpc-relax-too-far.s (+11-1) ``diff diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index de459013595fed..a85bf3aa0c09d1 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -328,9 +328,10 @@ bool X86_64::relaxOnce(int pass) const { if (rel.expr != R_RELAX_GOT_PC) continue; -uint64_t v = sec->getRelocTargetVA( -sec->file, rel.type, rel.addend, -sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr); +uint64_t v = sec->getRelocTargetVA(sec->file, rel.type, rel.addend, + sec->getOutputSection()->addr + + sec->outSecOff + rel.offset, + *rel.sym, rel.expr); if (isInt<32>(v)) continue; if (rel.sym->auxIdx == 0) { diff --git a/lld/test/ELF/x86-64-gotpc-relax-too-far.s b/lld/test/ELF/x86-64-gotpc-relax-too-far.s index 74aa6d8f65a0d8..ba41faab67de5c 100644 --- a/lld/test/ELF/x86-64-gotpc-relax-too-far.s +++ b/lld/test/ELF/x86-64-gotpc-relax-too-far.s @@ -5,7 +5,10 @@ # RUN: llvm-objdump --no-print-imm-hex -d %t/bin | FileCheck --check-prefix=DISASM %s # RUN: llvm-readelf -S %t/bin | FileCheck --check-prefixes=GOT %s # RUN: ld.lld -T %t/lds2 %t/a.o -o %t/bin2 -# RUN: llvm-readelf -S %t/bin2 | FileCheck --check-prefixes=UNNECESSARY-GOT %s +# RUN: llvm-objdump --no-print-imm-hex -d %t/bin2 | FileCheck --check-prefix=DISASM %s +# RUN: llvm-readelf -S %t/bin2 | FileCheck --check-prefixes=GOT %s +# RUN: ld.lld -T %t/lds3 %t/a.o -o %t/bin3 +# RUN: llvm-readelf -S %t/bin3 | FileCheck --check-prefixes=UNNECESSARY-GOT %s # DISASM: <_foo>: # DISASM-NEXT: movl2097146(%rip), %eax @@ -47,6 +50,13 @@ SECTIONS { data 0x8020 : { *(data) } } #--- lds2 +SECTIONS { + .text.foo 0x10 : { *(.text.foo) } + .text 0x1ff000 : { . = . + 0x1000 ; *(.text) } + .got 0x30 : { *(.got) } + data 0x8020 : { *(data) } +} +#--- lds3 SECTIONS { .text.foo 0x10 : { *(.text.foo) } .text 0x20 : { *(.text) } `` https://github.com/llvm/llvm-project/pull/86688 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/18.x: [lld/ELF][X86] Respect outSecOff when checking if GOTPCREL can be relaxed (#86334) (PR #86688)
llvmbot wrote: @MaskRay What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/86688 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/18.x: [lld/ELF][X86] Respect outSecOff when checking if GOTPCREL can be relaxed (#86334) (PR #86688)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/86688 Backport 48048051323d5dd74057dc5f32df8c3c323afcd5 Requested by: @EugeneZelenko >From 439503ebeb126b44b7fcc8fa73a6912216074745 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Sun, 24 Mar 2024 11:43:15 -0600 Subject: [PATCH] [lld/ELF][X86] Respect outSecOff when checking if GOTPCREL can be relaxed (#86334) The existing implementation didn't handle when the input text section was some offset from the output section. This resulted in an assert in relaxGot() with an lld built with asserts for some large binaries, or even worse, a silently broken binary with an lld without asserts. (cherry picked from commit 48048051323d5dd74057dc5f32df8c3c323afcd5) --- lld/ELF/Arch/X86_64.cpp | 7 --- lld/test/ELF/x86-64-gotpc-relax-too-far.s | 12 +++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp index de459013595fed..a85bf3aa0c09d1 100644 --- a/lld/ELF/Arch/X86_64.cpp +++ b/lld/ELF/Arch/X86_64.cpp @@ -328,9 +328,10 @@ bool X86_64::relaxOnce(int pass) const { if (rel.expr != R_RELAX_GOT_PC) continue; -uint64_t v = sec->getRelocTargetVA( -sec->file, rel.type, rel.addend, -sec->getOutputSection()->addr + rel.offset, *rel.sym, rel.expr); +uint64_t v = sec->getRelocTargetVA(sec->file, rel.type, rel.addend, + sec->getOutputSection()->addr + + sec->outSecOff + rel.offset, + *rel.sym, rel.expr); if (isInt<32>(v)) continue; if (rel.sym->auxIdx == 0) { diff --git a/lld/test/ELF/x86-64-gotpc-relax-too-far.s b/lld/test/ELF/x86-64-gotpc-relax-too-far.s index 74aa6d8f65a0d8..ba41faab67de5c 100644 --- a/lld/test/ELF/x86-64-gotpc-relax-too-far.s +++ b/lld/test/ELF/x86-64-gotpc-relax-too-far.s @@ -5,7 +5,10 @@ # RUN: llvm-objdump --no-print-imm-hex -d %t/bin | FileCheck --check-prefix=DISASM %s # RUN: llvm-readelf -S %t/bin | FileCheck --check-prefixes=GOT %s # RUN: ld.lld -T %t/lds2 %t/a.o -o %t/bin2 -# RUN: llvm-readelf -S %t/bin2 | FileCheck --check-prefixes=UNNECESSARY-GOT %s +# RUN: llvm-objdump --no-print-imm-hex -d %t/bin2 | FileCheck --check-prefix=DISASM %s +# RUN: llvm-readelf -S %t/bin2 | FileCheck --check-prefixes=GOT %s +# RUN: ld.lld -T %t/lds3 %t/a.o -o %t/bin3 +# RUN: llvm-readelf -S %t/bin3 | FileCheck --check-prefixes=UNNECESSARY-GOT %s # DISASM: <_foo>: # DISASM-NEXT: movl2097146(%rip), %eax @@ -47,6 +50,13 @@ SECTIONS { data 0x8020 : { *(data) } } #--- lds2 +SECTIONS { + .text.foo 0x10 : { *(.text.foo) } + .text 0x1ff000 : { . = . + 0x1000 ; *(.text) } + .got 0x30 : { *(.got) } + data 0x8020 : { *(data) } +} +#--- lds3 SECTIONS { .text.foo 0x10 : { *(.text.foo) } .text 0x20 : { *(.text) } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/18.x: [lld/ELF][X86] Respect outSecOff when checking if GOTPCREL can be relaxed (#86334) (PR #86688)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/86688 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [runtimes] Allow building against an installed LLVM tree (PR #86209)
@@ -218,6 +218,22 @@ foreach(entry ${runtimes}) endforeach() if(LLVM_INCLUDE_TESTS) + # Add lit if needed before adding any runtimes since their CMake tests + # configuration might depend on lit being present. + if (NOT HAVE_LLVM_LIT) +# If built by manually invoking cmake on this directory, we don't have +# llvm-lit. If invoked via llvm/runtimes, the toplevel llvm cmake +# invocation already generated the llvm-lit script. +set(LLVM_LIT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin) +add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit + ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit) +# Ensure that the testsuites use the local lit rather than +# LLVM_INSTALL_DIR/bin/llvm-lit (which may not exist if LLVM_BINARY_DIR +# points at an installed LLVM tree rather than a build tree. +get_llvm_lit_path(_base_dir _file_name) +set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE) ldionne wrote: Ah. I missed the distinction between `llvm/runtimes` and just `runtimes`. This could be made clearer by saying something like ``` If built with the runtimes build (rooted at runtimes/CMakeLists.txt), we don't have llvm-lit. If built with the bootstrapping build (rooted at llvm/CMakeLists.txt), the top-level llvm CMake invocation already generated the llvm-lit script. ``` https://github.com/llvm/llvm-project/pull/86209 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [runtimes] Allow building against an installed LLVM tree (PR #86209)
@@ -218,6 +218,22 @@ foreach(entry ${runtimes}) endforeach() if(LLVM_INCLUDE_TESTS) + # Add lit if needed before adding any runtimes since their CMake tests + # configuration might depend on lit being present. + if (NOT HAVE_LLVM_LIT) +# If built by manually invoking cmake on this directory, we don't have +# llvm-lit. If invoked via llvm/runtimes, the toplevel llvm cmake +# invocation already generated the llvm-lit script. +set(LLVM_LIT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin) +add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit + ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit) +# Ensure that the testsuites use the local lit rather than +# LLVM_INSTALL_DIR/bin/llvm-lit (which may not exist if LLVM_BINARY_DIR +# points at an installed LLVM tree rather than a build tree. +get_llvm_lit_path(_base_dir _file_name) +set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE) arichardson wrote: This should be explained by the existing comment: > If built by manually invoking cmake on this directory, we don't have > llvm-lit. If invoked via llvm/runtimes, the toplevel llvm cmake > invocation already generated the llvm-lit script. https://github.com/llvm/llvm-project/pull/86209 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Support tasks' implicit firstprivate DSA (PR #85989)
luporl wrote: @NimishMishra Thanks for the review! It seems the buildbot errors on Windows are all of this type: `fatal error C1002: compiler is out of heap space in pass 2`. > Is this merge not in main? It is not. It depends on https://github.com/llvm/llvm-project/pull/85978, that depends on https://github.com/llvm/llvm-project/pull/72510. As tasks' implicit firstprivate happens only when no default clause is specified, I thought it would be better to start from the default clause fix, in https://github.com/llvm/llvm-project/pull/72510. Actually, https://github.com/llvm/llvm-project/pull/78283 is also needed to avoid issues with threadprivate. https://github.com/llvm/llvm-project/pull/85989 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] release/18.x: [ODS][NFC] Cast range.size() to int32_t in accumulation (#85629) (PR #86677)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: None (llvmbot) Changes Backport bce17034157fdfe4d898d30366c1eeca3442fa3d Requested by: @EugeneZelenko --- Full diff: https://github.com/llvm/llvm-project/pull/86677.diff 1 Files Affected: - (modified) mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp (+1-1) ``diff diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 71326049af0579..7f748cfbd31ad4 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -3058,7 +3058,7 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder( body << llvm::formatv( "static_cast(std::accumulate({0}.begin(), {0}.end(), 0, " "[](int32_t curSum, ::mlir::ValueRange range) {{ return curSum + " -"range.size(); }))", +"static_cast(range.size()); }))", operandName); } else { body << "static_cast(" << getArgumentName(op, i) << ".size())"; `` https://github.com/llvm/llvm-project/pull/86677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] release/18.x: [ODS][NFC] Cast range.size() to int32_t in accumulation (#85629) (PR #86677)
llvmbot wrote: @joker-eph What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/86677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] release/18.x: [ODS][NFC] Cast range.size() to int32_t in accumulation (#85629) (PR #86677)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/86677 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] release/18.x: [ODS][NFC] Cast range.size() to int32_t in accumulation (#85629) (PR #86677)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/86677 Backport bce17034157fdfe4d898d30366c1eeca3442fa3d Requested by: @EugeneZelenko >From 6b3fe351c9abe9fa9e97ee26628fc1a99fe47e12 Mon Sep 17 00:00:00 2001 From: Andrei Golubev Date: Mon, 25 Mar 2024 23:47:39 +0200 Subject: [PATCH] [ODS][NFC] Cast range.size() to int32_t in accumulation (#85629) Using range.size() "as is" means we accumulate 'size_t' values into 'int32_t' variable. This may produce narrowing conversion warnings (particularly, on MSVC). The surrounding code seems to cast .size() to 'int32_t' so following this practice seems safe enough. Co-authored-by: Ovidiu Pintican (cherry picked from commit bce17034157fdfe4d898d30366c1eeca3442fa3d) --- mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 71326049af0579..7f748cfbd31ad4 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -3058,7 +3058,7 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder( body << llvm::formatv( "static_cast(std::accumulate({0}.begin(), {0}.end(), 0, " "[](int32_t curSum, ::mlir::ValueRange range) {{ return curSum + " -"range.size(); }))", +"static_cast(range.size()); }))", operandName); } else { body << "static_cast(" << getArgumentName(op, i) << ".size())"; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Use larger copies when register tuples are aligned (PR #84455)
@@ -212,19 +185,13 @@ body: | ; CHECK-NEXT: $v7 = VMV1R_V $v14 ; CHECK-NEXT: $v8 = VMV1R_V $v15 ; CHECK-NEXT: $v9 = VMV1R_V $v16 -; CHECK-NEXT: $v4 = VMV1R_V $v10 -; CHECK-NEXT: $v5 = VMV1R_V $v11 -; CHECK-NEXT: $v6 = VMV1R_V $v12 -; CHECK-NEXT: $v7 = VMV1R_V $v13 -; CHECK-NEXT: $v8 = VMV1R_V $v14 -; CHECK-NEXT: $v9 = VMV1R_V $v15 +; CHECK-NEXT: $v4m2 = VMV2R_V $v10m2 +; CHECK-NEXT: $v6m2 = VMV2R_V $v12m2 +; CHECK-NEXT: $v8m2 = VMV2R_V $v14m2 ; CHECK-NEXT: $v10 = VMV1R_V $v16 -; CHECK-NEXT: $v22 = VMV1R_V $v16 -; CHECK-NEXT: $v21 = VMV1R_V $v15 -; CHECK-NEXT: $v20 = VMV1R_V $v14 -; CHECK-NEXT: $v19 = VMV1R_V $v13 -; CHECK-NEXT: $v18 = VMV1R_V $v12 -; CHECK-NEXT: $v17 = VMV1R_V $v11 +; CHECK-NEXT: $v22m2 = VMV2R_V $v16m2 +; CHECK-NEXT: $v20m2 = VMV2R_V $v14m2 +; CHECK-NEXT: $v18m2 = VMV2R_V $v12m2 ; CHECK-NEXT: $v16 = VMV1R_V $v10 lukel97 wrote: Do we have a test for a copy like: ``` $v16_v17_v18_v19_v20_v21_v22 = COPY $v15_v16_v17_v18_v19_v20_v21 ``` Because I think this will need to be all VMV1R_Vs. Does it already do this? https://github.com/llvm/llvm-project/pull/84455 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [RISCV] Use larger copies when register tuples are aligned (PR #84455)
@@ -302,102 +302,98 @@ void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, RISCVII::VLMUL LMul, unsigned NF) const { const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - unsigned Opc; - unsigned SubRegIdx; - unsigned VVOpc, VIOpc; - switch (LMul) { - default: -llvm_unreachable("Impossible LMUL for vector register copy."); - case RISCVII::LMUL_1: -Opc = RISCV::VMV1R_V; -SubRegIdx = RISCV::sub_vrm1_0; -VVOpc = RISCV::PseudoVMV_V_V_M1; -VIOpc = RISCV::PseudoVMV_V_I_M1; -break; - case RISCVII::LMUL_2: -Opc = RISCV::VMV2R_V; -SubRegIdx = RISCV::sub_vrm2_0; -VVOpc = RISCV::PseudoVMV_V_V_M2; -VIOpc = RISCV::PseudoVMV_V_I_M2; -break; - case RISCVII::LMUL_4: -Opc = RISCV::VMV4R_V; -SubRegIdx = RISCV::sub_vrm4_0; -VVOpc = RISCV::PseudoVMV_V_V_M4; -VIOpc = RISCV::PseudoVMV_V_I_M4; -break; - case RISCVII::LMUL_8: -assert(NF == 1); -Opc = RISCV::VMV8R_V; -SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. -VVOpc = RISCV::PseudoVMV_V_V_M8; -VIOpc = RISCV::PseudoVMV_V_I_M8; -break; - } - - bool UseVMV_V_V = false; - bool UseVMV_V_I = false; - MachineBasicBlock::const_iterator DefMBBI; - if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { -UseVMV_V_V = true; -Opc = VVOpc; - -if (DefMBBI->getOpcode() == VIOpc) { - UseVMV_V_I = true; - Opc = VIOpc; -} - } - - if (NF == 1) { -auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); -if (UseVMV_V_V) - MIB.addReg(DstReg, RegState::Undef); -if (UseVMV_V_I) - MIB = MIB.add(DefMBBI->getOperand(2)); -else - MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); -if (UseVMV_V_V) { - const MCInstrDesc &Desc = DefMBBI->getDesc(); - MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL - MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW - MIB.addImm(0);// tu, mu - MIB.addReg(RISCV::VL, RegState::Implicit); - MIB.addReg(RISCV::VTYPE, RegState::Implicit); -} -return; - } - - int I = 0, End = NF, Incr = 1; unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); unsigned DstEncoding = TRI->getEncodingValue(DstReg); unsigned LMulVal; bool Fractional; std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); assert(!Fractional && "It is impossible be fractional lmul here."); - if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { -I = NF - 1; -End = -1; -Incr = -1; - } + unsigned NumRegs = NF * LMulVal; + bool ReversedCopy = + forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs); + if (ReversedCopy) { +// If there exists overlapping, we should copy the registers reversely. +SrcEncoding += NumRegs - LMulVal; +DstEncoding += NumRegs - LMulVal; + } + + unsigned I = 0; + auto GetCopyInfo = [&](uint16_t SrcEncoding, uint16_t DstEncoding) + -> std::tuple { +// If source register encoding and destination register encoding are aligned +// to 8, we can do a LMUL8 copying. +if (SrcEncoding % 8 == 0 && DstEncoding % 8 == 0 && I + 8 <= NumRegs) + return {RISCVII::LMUL_8, RISCV::VRM8RegClass, RISCV::VMV8R_V, + RISCV::PseudoVMV_V_V_M8, RISCV::PseudoVMV_V_I_M8}; +// If source register encoding and destination register encoding are aligned +// to 4, we can do a LMUL4 copying. +if (SrcEncoding % 4 == 0 && DstEncoding % 4 == 0 && I + 4 <= NumRegs) + return {RISCVII::LMUL_4, RISCV::VRM4RegClass, RISCV::VMV4R_V, + RISCV::PseudoVMV_V_V_M4, RISCV::PseudoVMV_V_I_M4}; +// If source register encoding and destination register encoding are aligned +// to 2, we can do a LMUL2 copying. +if (SrcEncoding % 2 == 0 && DstEncoding % 2 == 0 && I + 2 <= NumRegs) + return {RISCVII::LMUL_2, RISCV::VRM2RegClass, RISCV::VMV2R_V, + RISCV::PseudoVMV_V_V_M2, RISCV::PseudoVMV_V_I_M2}; +// Or we should do LMUL1 copying. +return {RISCVII::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V, +RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1}; + }; + auto FindRegWithEncoding = [&TRI](const TargetRegisterClass &RegClass, +uint16_t Encoding) { +ArrayRef Regs = RegClass.getRegisters(); +const auto *FoundReg = llvm::find_if(Regs, [&](MCPhysReg Reg) { + return TRI->getEncodingValue(Reg) == Encoding; +}); +// We should be always able to find one valid register. +assert(FoundReg != Regs.end()); +return *FoundReg; + }; lukel97 wrote: Would it be easier to get the register via `TRI->getSubReg`? I think you should be able to compute the subreg index based off the RegClass and `I`. I don't think you'll need to compose any subreg indices like in `RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs` ht
[llvm-branch-commits] [llvm] [RISCV] Use larger copies when register tuples are aligned (PR #84455)
@@ -302,102 +302,98 @@ void RISCVInstrInfo::copyPhysRegVector(MachineBasicBlock &MBB, RISCVII::VLMUL LMul, unsigned NF) const { const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - unsigned Opc; - unsigned SubRegIdx; - unsigned VVOpc, VIOpc; - switch (LMul) { - default: -llvm_unreachable("Impossible LMUL for vector register copy."); - case RISCVII::LMUL_1: -Opc = RISCV::VMV1R_V; -SubRegIdx = RISCV::sub_vrm1_0; -VVOpc = RISCV::PseudoVMV_V_V_M1; -VIOpc = RISCV::PseudoVMV_V_I_M1; -break; - case RISCVII::LMUL_2: -Opc = RISCV::VMV2R_V; -SubRegIdx = RISCV::sub_vrm2_0; -VVOpc = RISCV::PseudoVMV_V_V_M2; -VIOpc = RISCV::PseudoVMV_V_I_M2; -break; - case RISCVII::LMUL_4: -Opc = RISCV::VMV4R_V; -SubRegIdx = RISCV::sub_vrm4_0; -VVOpc = RISCV::PseudoVMV_V_V_M4; -VIOpc = RISCV::PseudoVMV_V_I_M4; -break; - case RISCVII::LMUL_8: -assert(NF == 1); -Opc = RISCV::VMV8R_V; -SubRegIdx = RISCV::sub_vrm1_0; // There is no sub_vrm8_0. -VVOpc = RISCV::PseudoVMV_V_V_M8; -VIOpc = RISCV::PseudoVMV_V_I_M8; -break; - } - - bool UseVMV_V_V = false; - bool UseVMV_V_I = false; - MachineBasicBlock::const_iterator DefMBBI; - if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { -UseVMV_V_V = true; -Opc = VVOpc; - -if (DefMBBI->getOpcode() == VIOpc) { - UseVMV_V_I = true; - Opc = VIOpc; -} - } - - if (NF == 1) { -auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); -if (UseVMV_V_V) - MIB.addReg(DstReg, RegState::Undef); -if (UseVMV_V_I) - MIB = MIB.add(DefMBBI->getOperand(2)); -else - MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); -if (UseVMV_V_V) { - const MCInstrDesc &Desc = DefMBBI->getDesc(); - MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL - MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW - MIB.addImm(0);// tu, mu - MIB.addReg(RISCV::VL, RegState::Implicit); - MIB.addReg(RISCV::VTYPE, RegState::Implicit); -} -return; - } - - int I = 0, End = NF, Incr = 1; unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); unsigned DstEncoding = TRI->getEncodingValue(DstReg); unsigned LMulVal; bool Fractional; std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul); assert(!Fractional && "It is impossible be fractional lmul here."); - if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { -I = NF - 1; -End = -1; -Incr = -1; - } + unsigned NumRegs = NF * LMulVal; + bool ReversedCopy = + forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NumRegs); + if (ReversedCopy) { +// If there exists overlapping, we should copy the registers reversely. lukel97 wrote: Nit, maybe clarify this happens when copying tuples? ```suggestion // If the src and dest overlap when copying a tuple, we need to copy the registers in reverse. ``` https://github.com/llvm/llvm-project/pull/84455 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [runtimes] Allow building against an installed LLVM tree (PR #86209)
@@ -218,6 +218,22 @@ foreach(entry ${runtimes}) endforeach() if(LLVM_INCLUDE_TESTS) + # Add lit if needed before adding any runtimes since their CMake tests + # configuration might depend on lit being present. + if (NOT HAVE_LLVM_LIT) +# If built by manually invoking cmake on this directory, we don't have +# llvm-lit. If invoked via llvm/runtimes, the toplevel llvm cmake +# invocation already generated the llvm-lit script. +set(LLVM_LIT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin) +add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit + ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit) +# Ensure that the testsuites use the local lit rather than +# LLVM_INSTALL_DIR/bin/llvm-lit (which may not exist if LLVM_BINARY_DIR +# points at an installed LLVM tree rather than a build tree. +get_llvm_lit_path(_base_dir _file_name) +set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE) ldionne wrote: I don't understand. We *must* have `lit` available since we are in the `runtimes/CMakeLists.txt`? I actually don't understand why the `if (NOT HAVE_LLVM_LIT)` branch is ever taken. Can you shed some light on this? https://github.com/llvm/llvm-project/pull/86209 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Support tasks' implicit firstprivate DSA (PR #85989)
NimishMishra wrote: Also, the PR shows "[luporl] wants to merge 1 commit into [**llvm:users/luporl/refactor-default-test**]from [luporl:luporl-omp-implicit]" Is this merge not in `main`? https://github.com/llvm/llvm-project/pull/85989 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Support tasks' implicit firstprivate DSA (PR #85989)
@@ -213,11 +213,10 @@ subroutine nested_default_clause_tests !CHECK: omp.terminator !CHECK: } !CHECK: omp.parallel { -!CHECK: %[[PRIVATE_INNER_Z:.*]] = fir.alloca i32 {bindc_name = "z", pinned, uniq_name = "_QFnested_default_clause_testsEz"} !CHECK: %[[PRIVATE_INNER_W:.*]] = fir.alloca i32 {bindc_name = "w", pinned, uniq_name = "_QFnested_default_clause_testsEw"} !CHECK: %[[PRIVATE_INNER_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFnested_default_clause_testsEx"} !CHECK: %[[temp_1:.*]] = fir.load %[[PRIVATE_INNER_X]] : !fir.ref -!CHECK: %[[temp_2:.*]] = fir.load %[[PRIVATE_INNER_Z]] : !fir.ref +!CHECK: %[[temp_2:.*]] = fir.load %[[PRIVATE_Z]] : !fir.ref NimishMishra wrote: Thanks for this. Indeed `shared(z)` needs to have `PRIVATE_Z` and not `PRIVATE_INNER_Z`. https://github.com/llvm/llvm-project/pull/85989 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Support tasks' implicit firstprivate DSA (PR #85989)
https://github.com/NimishMishra approved this pull request. Thanks a lot for these changes. I went through the changes; the handling of symbols in semantics seems particularly fine. LGTM! The windows builtbot is failing, though Linux is passing. https://github.com/llvm/llvm-project/pull/85989 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Support tasks' implicit firstprivate DSA (PR #85989)
https://github.com/NimishMishra edited https://github.com/llvm/llvm-project/pull/85989 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [CIR][NFC] Add scaffolding for the CIR dialect and CIROps.td (PR #86080)
@@ -0,0 +1,7 @@ +set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --src-root +set(MLIR_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --includedir +set(MLIR_TABLEGEN_OUTPUT_DIR ${CMAKE_BINARY_DIR}/tools/mlir/include) +include_directories(SYSTEM ${MLIR_INCLUDE_DIR}) +include_directories(SYSTEM ${MLIR_TABLEGEN_OUTPUT_DIR}) petrhosek wrote: Why do these need to be system include directories? Rather than adding these to global list of include paths, could we create an interface library and use it as an explicit dependency only for targets that need it? https://github.com/llvm/llvm-project/pull/86080 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [CIR][NFC] Add scaffolding for the CIR dialect and CIROps.td (PR #86080)
@@ -0,0 +1,7 @@ +set(MLIR_MAIN_SRC_DIR ${LLVM_MAIN_SRC_DIR}/../mlir/include ) # --src-root petrhosek wrote: This variable seems unused and has the same value as `MLIR_INCLUDE_DIR` below, what's the use case for this variable? https://github.com/llvm/llvm-project/pull/86080 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] release/18.x: [mlir][NFC] Apply rule of five to *Pass classes (#80998) (PR #83971)
andrey-golubev wrote: Aha, so, apparently, I cannot even close a PR (lack of commit write access) :/ Could someone close this one as we don't plan on merging it? Not urgent anyway. https://github.com/llvm/llvm-project/pull/83971 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits