https://github.com/yuxuanchen1997 updated https://github.com/llvm/llvm-project/pull/99283
>From 0c712a2fbc5b44e892b37085dbace8ba974c1238 Mon Sep 17 00:00:00 2001 From: Yuxuan Chen <yuxuanchen1...@outlook.com> Date: Tue, 4 Jun 2024 23:22:00 -0700 Subject: [PATCH] [LLVM][Coroutines] Create `.noalloc` variant of switch ABI coroutine ramp functions during CoroSplit --- llvm/docs/Coroutines.rst | 18 +++ llvm/lib/Transforms/Coroutines/CoroInternal.h | 7 + llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 150 +++++++++++++++--- llvm/lib/Transforms/Coroutines/Coroutines.cpp | 27 ++++ .../Transforms/Coroutines/coro-split-00.ll | 15 ++ 5 files changed, 191 insertions(+), 26 deletions(-) diff --git a/llvm/docs/Coroutines.rst b/llvm/docs/Coroutines.rst index 36092325e536fb..5679aefcb421d8 100644 --- a/llvm/docs/Coroutines.rst +++ b/llvm/docs/Coroutines.rst @@ -2022,6 +2022,12 @@ The pass CoroSplit builds coroutine frame and outlines resume and destroy parts into separate functions. This pass also lowers `coro.await.suspend.void`_, `coro.await.suspend.bool`_ and `coro.await.suspend.handle`_ intrinsics. +CoroAnnotationElide +------------------- +This pass finds all usages of coroutines that are "must elide" and replaces +`coro.begin` intrinsic with an address of a coroutine frame placed on its caller +and replaces `coro.alloc` and `coro.free` intrinsics with `false` and `null` +respectively to remove the deallocation code. CoroElide --------- @@ -2049,6 +2055,18 @@ the coroutine must reach the final suspend point when it get destroyed. This attribute only works for switched-resume coroutines now. +coro_elide_safe +--------------- + +When a Call or Invoke instruction to switch ABI coroutine `f` is marked with +`coro_elide_safe`, CoroSplitPass generates a `f.noalloc` ramp function. +`f.noalloc` has one more argument than its original ramp function `f`, which is +the pointer to the allocated frame. `f.noalloc` also suppressed any allocations +or deallocations that may be guarded by `@llvm.coro.alloc` and `@llvm.coro.free`. + +CoroAnnotationElidePass performs the heap elision when possible. Note that for +recursive or mutually recursive functions this elision is usually not possible. + Metadata ======== diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h index d535ad7f85d74a..be86f96525b677 100644 --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -26,6 +26,13 @@ bool declaresIntrinsics(const Module &M, const std::initializer_list<StringRef>); void replaceCoroFree(CoroIdInst *CoroId, bool Elide); +/// Replaces all @llvm.coro.alloc intrinsics calls associated with a given +/// call @llvm.coro.id instruction with boolean value false. +void suppressCoroAllocs(CoroIdInst *CoroId); +/// Replaces CoroAllocs with boolean value false. +void suppressCoroAllocs(LLVMContext &Context, + ArrayRef<CoroAllocInst *> CoroAllocs); + /// Attempts to rewrite the location operand of debug intrinsics in terms of /// the coroutine frame pointer, folding pointer offsets into the DIExpression /// of the intrinsic. diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 6bf3c75b95113e..494c4d632de95f 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/PriorityWorklist.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" @@ -1177,6 +1178,14 @@ static void updateAsyncFuncPointerContextSize(coro::Shape &Shape) { Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct); } +static TypeSize getFrameSizeForShape(coro::Shape &Shape) { + // In the same function all coro.sizes should have the same result type. + auto *SizeIntrin = Shape.CoroSizes.back(); + Module *M = SizeIntrin->getModule(); + const DataLayout &DL = M->getDataLayout(); + return DL.getTypeAllocSize(Shape.FrameTy); +} + static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { if (Shape.ABI == coro::ABI::Async) updateAsyncFuncPointerContextSize(Shape); @@ -1192,10 +1201,8 @@ static void replaceFrameSizeAndAlignment(coro::Shape &Shape) { // In the same function all coro.sizes should have the same result type. auto *SizeIntrin = Shape.CoroSizes.back(); - Module *M = SizeIntrin->getModule(); - const DataLayout &DL = M->getDataLayout(); - auto Size = DL.getTypeAllocSize(Shape.FrameTy); - auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size); + auto *SizeConstant = + ConstantInt::get(SizeIntrin->getType(), getFrameSizeForShape(Shape)); for (CoroSizeInst *CS : Shape.CoroSizes) { CS->replaceAllUsesWith(SizeConstant); @@ -1452,6 +1459,75 @@ struct SwitchCoroutineSplitter { setCoroInfo(F, Shape, Clones); } + // Create a variant of ramp function that does not perform heap allocation + // for a switch ABI coroutine. + // + // The newly split `.noalloc` ramp function has the following differences: + // - Has one additional frame pointer parameter in lieu of dynamic + // allocation. + // - Suppressed allocations by replacing coro.alloc and coro.free. + static Function *createNoAllocVariant(Function &F, coro::Shape &Shape, + SmallVectorImpl<Function *> &Clones) { + assert(Shape.ABI == coro::ABI::Switch); + auto *OrigFnTy = F.getFunctionType(); + auto OldParams = OrigFnTy->params(); + + SmallVector<Type *> NewParams; + NewParams.reserve(OldParams.size() + 1); + NewParams.append(OldParams.begin(), OldParams.end()); + NewParams.push_back(PointerType::getUnqual(Shape.FrameTy)); + + auto *NewFnTy = FunctionType::get(OrigFnTy->getReturnType(), NewParams, + OrigFnTy->isVarArg()); + Function *NoAllocF = + Function::Create(NewFnTy, F.getLinkage(), F.getName() + ".noalloc"); + + ValueToValueMapTy VMap; + unsigned int Idx = 0; + for (const auto &I : F.args()) { + VMap[&I] = NoAllocF->getArg(Idx++); + } + // We just appended the frame pointer as the last argument of the new + // function. + auto FrameIdx = NoAllocF->arg_size() - 1; + SmallVector<ReturnInst *, 4> Returns; + CloneFunctionInto(NoAllocF, &F, VMap, + CloneFunctionChangeType::LocalChangesOnly, Returns); + + if (Shape.CoroBegin) { + auto *NewCoroBegin = + cast_if_present<CoroBeginInst>(VMap[Shape.CoroBegin]); + auto *NewCoroId = cast<CoroIdInst>(NewCoroBegin->getId()); + coro::replaceCoroFree(NewCoroId, /*Elide=*/true); + coro::suppressCoroAllocs(NewCoroId); + NewCoroBegin->replaceAllUsesWith(NoAllocF->getArg(FrameIdx)); + NewCoroBegin->eraseFromParent(); + } + + Module *M = F.getParent(); + M->getFunctionList().insert(M->end(), NoAllocF); + + removeUnreachableBlocks(*NoAllocF); + auto NewAttrs = NoAllocF->getAttributes(); + // When we elide allocation, we read these attributes to determine the + // frame size and alignment. + addFramePointerAttrs(NewAttrs, NoAllocF->getContext(), FrameIdx, + Shape.FrameSize, Shape.FrameAlign, + /*NoAlias=*/false); + + NoAllocF->setAttributes(NewAttrs); + + Clones.push_back(NoAllocF); + // Reset the original function's coro info, make the new noalloc variant + // connected to the original ramp function. + setCoroInfo(F, Shape, Clones); + // After copying, set the linkage to internal linkage. Original function + // may have different linkage, but optimization dependent on this function + // generally relies on LTO. + NoAllocF->setLinkage(llvm::GlobalValue::InternalLinkage); + return NoAllocF; + } + private: // Create a resume clone by cloning the body of the original function, setting // new entry block and replacing coro.suspend an appropriate value to force @@ -1910,6 +1986,33 @@ class PrettyStackTraceFunction : public PrettyStackTraceEntry { }; } // namespace +/// Remove calls to llvm.coro.end in the original function. +static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { + if (Shape.ABI != coro::ABI::Switch) { + for (auto *End : Shape.CoroEnds) { + replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); + } + } else { + for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { + auto &Context = End->getContext(); + End->replaceAllUsesWith(ConstantInt::getFalse(Context)); + End->eraseFromParent(); + } + } +} + +static bool hasSafeElideCaller(Function &F) { + for (auto *U : F.users()) { + if (auto *CB = dyn_cast<CallBase>(U)) { + auto *Caller = CB->getFunction(); + if (Caller && Caller->isPresplitCoroutine() && + CB->hasFnAttr(llvm::Attribute::CoroElideSafe)) + return true; + } + } + return false; +} + static coro::Shape splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, TargetTransformInfo &TTI, bool OptimizeFrame, @@ -1929,10 +2032,15 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, simplifySuspendPoints(Shape); buildCoroutineFrame(F, Shape, TTI, MaterializableCallback); replaceFrameSizeAndAlignment(Shape); + bool isNoSuspendCoroutine = Shape.CoroSuspends.empty(); + + bool shouldCreateNoAllocVariant = !isNoSuspendCoroutine && + Shape.ABI == coro::ABI::Switch && + hasSafeElideCaller(F); // If there are no suspend points, no split required, just remove // the allocation and deallocation blocks, they are not needed. - if (Shape.CoroSuspends.empty()) { + if (isNoSuspendCoroutine) { handleNoSuspendCoroutine(Shape); } else { switch (Shape.ABI) { @@ -1962,22 +2070,13 @@ splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones, coro::salvageDebugInfo(ArgToAllocaMap, *DDI, false /*UseEntryValue*/); for (DbgVariableRecord *DVR : DbgVariableRecords) coro::salvageDebugInfo(ArgToAllocaMap, *DVR, false /*UseEntryValue*/); - return Shape; -} -/// Remove calls to llvm.coro.end in the original function. -static void removeCoroEndsFromRampFunction(const coro::Shape &Shape) { - if (Shape.ABI != coro::ABI::Switch) { - for (auto *End : Shape.CoroEnds) { - replaceCoroEnd(End, Shape, Shape.FramePtr, /*in resume*/ false, nullptr); - } - } else { - for (llvm::AnyCoroEndInst *End : Shape.CoroEnds) { - auto &Context = End->getContext(); - End->replaceAllUsesWith(ConstantInt::getFalse(Context)); - End->eraseFromParent(); - } - } + removeCoroEndsFromRampFunction(Shape); + + if (shouldCreateNoAllocVariant) + SwitchCoroutineSplitter::createNoAllocVariant(F, Shape, Clones); + + return Shape; } static void updateCallGraphAfterCoroutineSplit( @@ -2108,13 +2207,12 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, F.setSplittedCoroutine(); SmallVector<Function *, 4> Clones; - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - const coro::Shape Shape = + coro::Shape Shape = splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F), OptimizeFrame, MaterializableCallback); - removeCoroEndsFromRampFunction(Shape); updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM); + auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); ORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F) << "Split '" << ore::NV("function", F.getName()) @@ -2130,9 +2228,9 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C, } } - for (auto *PrepareFn : PrepareFns) { - replaceAllPrepares(PrepareFn, CG, C); - } + for (auto *PrepareFn : PrepareFns) { + replaceAllPrepares(PrepareFn, CG, C); + } return PreservedAnalyses::none(); } diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp index 1a92bc1636257b..be257339e0ac49 100644 --- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp +++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp @@ -145,6 +145,33 @@ void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) { } } +void coro::suppressCoroAllocs(CoroIdInst *CoroId) { + SmallVector<CoroAllocInst *, 4> CoroAllocs; + for (User *U : CoroId->users()) + if (auto *CA = dyn_cast<CoroAllocInst>(U)) + CoroAllocs.push_back(CA); + + if (CoroAllocs.empty()) + return; + + coro::suppressCoroAllocs(CoroId->getContext(), CoroAllocs); +} + +// Replacing llvm.coro.alloc with false will suppress dynamic +// allocation as it is expected for the frontend to generate the code that +// looks like: +// id = coro.id(...) +// mem = coro.alloc(id) ? malloc(coro.size()) : 0; +// coro.begin(id, mem) +void coro::suppressCoroAllocs(LLVMContext &Context, + ArrayRef<CoroAllocInst *> CoroAllocs) { + auto *False = ConstantInt::getFalse(Context); + for (auto *CA : CoroAllocs) { + CA->replaceAllUsesWith(False); + CA->eraseFromParent(); + } +} + static void clear(coro::Shape &Shape) { Shape.CoroBegin = nullptr; Shape.CoroEnds.clear(); diff --git a/llvm/test/Transforms/Coroutines/coro-split-00.ll b/llvm/test/Transforms/Coroutines/coro-split-00.ll index b35bd720b86f96..9909627e60597d 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-00.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-00.ll @@ -32,6 +32,13 @@ suspend: ret ptr %hdl } +; Make a safe_elide call to f and CoroSplit should generate the .noalloc variant +define void @caller() presplitcoroutine { +entry: + %ptr = call ptr @f() #1 + ret void +} + ; CHECK-LABEL: @f() !func_sanitize !0 { ; CHECK: call ptr @malloc ; CHECK: @llvm.coro.begin(token %id, ptr %phi) @@ -63,6 +70,13 @@ suspend: ; CHECK-NOT: call void @free( ; CHECK: ret void +; CHECK-LABEL: @f.noalloc(ptr noundef nonnull align 8 dereferenceable(24) %{{.*}}) +; CHECK-NOT: call ptr @malloc +; CHECK: call void @print(i32 0) +; CHECK-NOT: call void @print(i32 1) +; CHECK-NOT: call void @free( +; CHECK: ret ptr %{{.*}} + declare ptr @llvm.coro.free(token, ptr) declare i32 @llvm.coro.size.i32() declare i8 @llvm.coro.suspend(token, i1) @@ -79,3 +93,4 @@ declare void @print(i32) declare void @free(ptr) willreturn allockind("free") "alloc-family"="malloc" !0 = !{i32 846595819, ptr null} +attributes #1 = { coro_elide_safe } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits