ychen updated this revision to Diff 338430.
ychen added a comment.
fix comment
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D100739/new/
https://reviews.llvm.org/D100739
Files:
clang/docs/LanguageExtensions.rst
clang/include/clang/Basic/Builtins.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Sema/SemaCoroutine.cpp
llvm/docs/Coroutines.rst
llvm/include/llvm/IR/Intrinsics.td
llvm/lib/Transforms/Coroutines/CoroFrame.cpp
llvm/lib/Transforms/Coroutines/CoroInstr.h
llvm/lib/Transforms/Coroutines/CoroInternal.h
llvm/lib/Transforms/Coroutines/CoroSplit.cpp
llvm/lib/Transforms/Coroutines/Coroutines.cpp
llvm/test/Transforms/Coroutines/ArgAddr.ll
llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
llvm/test/Transforms/Coroutines/coro-alloca-01.ll
Index: llvm/test/Transforms/Coroutines/coro-alloca-01.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloca-01.ll
+++ llvm/test/Transforms/Coroutines/coro-alloca-01.ll
@@ -8,7 +8,7 @@
%x = alloca i64
%y = alloca i64
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
- %size = call i32 @llvm.coro.size.i32()
+ %size = call i32 @llvm.coro.size.i32(i1 false)
%alloc = call i8* @malloc(i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
br i1 %n, label %flag_true, label %flag_false
Index: llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
===================================================================
--- llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
+++ llvm/test/Transforms/Coroutines/coro-alloc-with-param-O0.ll
@@ -9,8 +9,8 @@
%this.addr = alloca i64
store i64 %this_arg, i64* %this.addr
%this = load i64, i64* %this.addr
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
- %size = call i32 @llvm.coro.size.i32()
+ %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null)
+ %size = call i32 @llvm.coro.size.i32(i1 true)
%alloc = call i8* @myAlloc(i64 %this, i32 %size)
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
@@ -45,7 +45,7 @@
; CHECK: ret void
declare i8* @llvm.coro.free(token, i8*)
-declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.size.i32(i1)
declare i8 @llvm.coro.suspend(token, i1)
declare void @llvm.coro.resume(i8*)
declare void @llvm.coro.destroy(i8*)
Index: llvm/test/Transforms/Coroutines/ArgAddr.ll
===================================================================
--- llvm/test/Transforms/Coroutines/ArgAddr.ll
+++ llvm/test/Transforms/Coroutines/ArgAddr.ll
@@ -21,10 +21,10 @@
; CHECK-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4
;
entry:
- %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null);
+ %id = call token @llvm.coro.id(i32 16, i8* null, i8* null, i8* null);
%n.addr = alloca i32
store i32 %n, i32* %n.addr ; this needs to go after coro.begin
- %0 = tail call i32 @llvm.coro.size.i32()
+ %0 = tail call i32 @llvm.coro.size.i32(i1 true)
%call = tail call i8* @malloc(i32 %0)
%1 = tail call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %call)
%2 = bitcast i32* %n.addr to i8*
@@ -69,7 +69,7 @@
declare void @ctor(i8* nocapture readonly)
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
-declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.size.i32(i1)
declare i8* @llvm.coro.begin(token, i8*)
declare i8 @llvm.coro.suspend(token, i1)
declare i8* @llvm.coro.free(token, i8*)
Index: llvm/lib/Transforms/Coroutines/Coroutines.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -234,6 +234,7 @@
Shape.CoroBegin = nullptr;
Shape.CoroEnds.clear();
Shape.CoroSizes.clear();
+ Shape.CoroAligns.clear();
Shape.CoroSuspends.clear();
Shape.FrameTy = nullptr;
@@ -267,6 +268,10 @@
continue;
case Intrinsic::coro_size:
CoroSizes.push_back(cast<CoroSizeInst>(II));
+ HandleOverAlign = HandleOverAlign || CoroSizes.back()->isAlloc();
+ break;
+ case Intrinsic::coro_align:
+ CoroAligns.push_back(cast<CoroAlignInst>(II));
break;
case Intrinsic::coro_frame:
CoroFrames.push_back(cast<CoroFrameInst>(II));
Index: llvm/lib/Transforms/Coroutines/CoroSplit.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -69,6 +69,7 @@
#include <cstdint>
#include <initializer_list>
#include <iterator>
+#include <stdint.h>
using namespace llvm;
@@ -997,23 +998,39 @@
Shape.AsyncLowering.AsyncFuncPointer->setInitializer(NewFuncPtrStruct);
}
-static void replaceFrameSize(coro::Shape &Shape) {
+static void replaceFrameSizeAndAlign(coro::Shape &Shape) {
if (Shape.ABI == coro::ABI::Async)
updateAsyncFuncPointerContextSize(Shape);
- if (Shape.CoroSizes.empty())
- return;
+ if (!Shape.CoroSizes.empty()) {
+ // In the same function all coro.sizes should have the same result type.
+ auto *SizeIntrin = Shape.CoroSizes.back();
+ Module *M = SizeIntrin->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ auto Size = DL.getTypeAllocSize(Shape.FrameTy);
- // In the same function all coro.sizes should have the same result type.
- auto *SizeIntrin = Shape.CoroSizes.back();
- Module *M = SizeIntrin->getModule();
- const DataLayout &DL = M->getDataLayout();
- auto Size = DL.getTypeAllocSize(Shape.FrameTy);
- auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+ uint64_t FrameAlign = Shape.FrameAlign.value();
+ uint64_t NewAlign = Shape.getSwitchCoroId()->getAlignment();
+ uint64_t Extra = FrameAlign > NewAlign ? FrameAlign - NewAlign : 0;
- for (CoroSizeInst *CS : Shape.CoroSizes) {
- CS->replaceAllUsesWith(SizeConstant);
- CS->eraseFromParent();
+ for (CoroSizeInst *CS : Shape.CoroSizes) {
+ // Over allocate if needed.
+ uint64_t FrameSize = Size + (Shape.HandleOverAlign ? Extra : 0);
+ auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), FrameSize);
+ CS->replaceAllUsesWith(SizeConstant);
+ CS->eraseFromParent();
+ }
+ }
+
+ if (!Shape.CoroAligns.empty()) {
+ auto *Intrin = Shape.CoroAligns.back();
+ auto *AlignConstant =
+ ConstantInt::get(Intrin->getType(), Shape.FrameAlign.value());
+
+ for (CoroAlignInst *CS : Shape.CoroAligns) {
+ CS->replaceAllUsesWith(AlignConstant);
+ CS->eraseFromParent();
+ }
}
}
@@ -1748,7 +1765,7 @@
simplifySuspendPoints(Shape);
buildCoroutineFrame(F, Shape);
- replaceFrameSize(Shape);
+ replaceFrameSizeAndAlign(Shape);
// If there are no suspend points, no split required, just remove
// the allocation and deallocation blocks, they are not needed.
Index: llvm/lib/Transforms/Coroutines/CoroInternal.h
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -99,6 +99,7 @@
CoroBeginInst *CoroBegin;
SmallVector<AnyCoroEndInst *, 4> CoroEnds;
SmallVector<CoroSizeInst *, 2> CoroSizes;
+ SmallVector<CoroAlignInst *, 2> CoroAligns;
SmallVector<AnyCoroSuspendInst *, 4> CoroSuspends;
SmallVector<CallInst*, 2> SwiftErrorOps;
@@ -124,8 +125,8 @@
uint64_t FrameSize;
Instruction *FramePtr;
BasicBlock *AllocaSpillBlock;
-
- bool ReuseFrameSlot;
+ bool HandleOverAlign = false;
+ bool ReuseFrameSlot = false;
struct SwitchLoweringStorage {
SwitchInst *ResumeSwitch;
@@ -269,7 +270,7 @@
void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const;
Shape() = default;
- explicit Shape(Function &F, bool ReuseFrameSlot = false)
+ explicit Shape(Function &F, bool ReuseFrameSlot)
: ReuseFrameSlot(ReuseFrameSlot) {
buildFrom(F);
}
Index: llvm/lib/Transforms/Coroutines/CoroInstr.h
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -121,6 +121,10 @@
: cast<AllocaInst>(Arg->stripPointerCasts());
}
+ unsigned getAlignment() const {
+ return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+ }
+
void clearPromise() {
Value *Arg = getArgOperand(PromiseArg);
setArgOperand(PromiseArg,
@@ -402,9 +406,9 @@
/// This represents the llvm.coro.free instruction.
class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst {
+public:
enum { IdArg, FrameArg };
-public:
Value *getFrame() const { return getArgOperand(FrameArg); }
// Methods to support type inquiry through isa, cast, and dyn_cast:
@@ -589,7 +593,12 @@
/// This represents the llvm.coro.size instruction.
class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst {
+ enum { AllocArg };
+
public:
+ bool isAlloc() const {
+ return cast<Constant>(getArgOperand(0))->isOneValue();
+ }
// Methods to support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::coro_size;
@@ -599,6 +608,18 @@
}
};
+/// This represents the llvm.coro.align instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAlignInst : public IntrinsicInst {
+public:
+ // Methods to support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::coro_align;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
class LLVM_LIBRARY_VISIBILITY AnyCoroEndInst : public IntrinsicInst {
enum { FrameArg, UnwindArg };
Index: llvm/lib/Transforms/Coroutines/CoroFrame.cpp
===================================================================
--- llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -14,17 +14,21 @@
// the value into the coroutine frame.
//===----------------------------------------------------------------------===//
+#include "CoroInstr.h"
#include "CoroInternal.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
@@ -486,6 +490,8 @@
return StructAlign;
}
+ SmallVector<Field, 8> &getFields() { return Fields; }
+
FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
assert(IsFinished && "not yet finished!");
return Fields[Id].LayoutFieldIndex;
@@ -710,6 +716,77 @@
IsFinished = true;
}
+// Adapted from CodeGenFunction::EmitBuiltinAlignTo.
+static Value *emitAlignUpTo(IRBuilder<> &Builder, Value *Src, uint64_t Align) {
+ const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
+
+ auto *SrcType = cast<PointerType>(Src->getType());
+ IntegerType *IntType = IntegerType::get(Builder.getContext(),
+ DL.getIndexTypeSizeInBits(SrcType));
+ Value *Alignment = ConstantInt::get(IntType, Align);
+ auto *One = ConstantInt::get(IntType, 1);
+ Value *Mask = Builder.CreateSub(Alignment, One, "mask");
+ Value *SrcAddr = Builder.CreatePtrToInt(Src, IntType, "intptr");
+
+ // When aligning up we have to first add the mask to ensure we go over the
+ // next alignment value and then align down to the next valid multiple.
+ // By adding the mask, we ensure that align_up on an already aligned
+ // value will not change the value.
+ Value *SrcForMask = Builder.CreateAdd(SrcAddr, Mask, "over_boundary");
+
+ // Invert the mask to only clear the lower bits.
+ Value *InvertedMask = Builder.CreateNot(Mask, "inverted_mask");
+ Value *Result = Builder.CreateAnd(SrcForMask, InvertedMask, "aligned_result");
+
+ Result->setName("aligned_intptr");
+ Value *Difference = Builder.CreateSub(Result, SrcAddr, "diff");
+ // The result must point to the same underlying allocation. This means we
+ // can use an inbounds GEP to enable better optimization.
+
+ PointerType *DestType = Builder.getInt8PtrTy();
+ if (unsigned AddrSpace = SrcType->getAddressSpace())
+ DestType = Type::getInt8PtrTy(Builder.getContext(), AddrSpace);
+
+ Value *Base = Src;
+ if (SrcType != DestType)
+ Base = Builder.CreateBitCast(Src, DestType);
+
+ // Out-of-bound case could not happen.
+ Result = Builder.CreateGEP(Base, Difference, "aligned_result");
+ Result = Builder.CreatePointerCast(Result, SrcType);
+
+ Type *IntPtrTy = Builder.getIntPtrTy(DL);
+ if (Alignment->getType() != IntPtrTy)
+ Alignment =
+ Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align");
+ (void)Builder.CreateAlignmentAssumption(DL, Result, Alignment);
+ assert(Result->getType() == SrcType);
+ return Result;
+}
+
+static void replaceCoroFreeMemArg(CoroIdInst *CoroId,
+ AllocaInst *FramePtrAddr) {
+ SmallVector<CoroFreeInst *, 4> CoroFrees;
+ for (User *U : CoroId->users())
+ if (auto *CF = dyn_cast<CoroFreeInst>(U))
+ CoroFrees.push_back(CF);
+
+ if (CoroFrees.empty())
+ return;
+
+ for (CoroFreeInst *CF : CoroFrees) {
+ // Assume CallInst users of `coro.free` are dealloc functions.
+
+ // CF->setArgOperand(CoroFreeInst::FrameArg, FrameAddr);
+ for (Use &U : CF->uses())
+ if (CallBase *CB = dyn_cast<CallBase>(U.getUser())) {
+ LoadInst *FrameAddr = new LoadInst(FramePtrAddr->getAllocatedType(),
+ FramePtrAddr, "raw.frame.ptr", CB);
+ CB->replaceUsesOfWith(CF, FrameAddr);
+ }
+ }
+}
+
// Build a struct that will keep state for an active coroutine.
// struct f.frame {
// ResumeFnTy ResumeFnAddr;
@@ -779,6 +856,40 @@
FrameData.setFieldIndex(S.first, Id);
}
+ Align FrameAlign =
+ std::max_element(
+ B.getFields().begin(), B.getFields().end(),
+ [](auto &F1, auto &F2) { return F1.Alignment < F2.Alignment; })
+ ->Alignment;
+
+ // Check for over-alignment.
+ if (Shape.HandleOverAlign &&
+ FrameAlign.value() > Shape.getSwitchCoroId()->getAlignment()) {
+ BasicBlock &Entry = F.getEntryBlock();
+ IRBuilder<> Builder(&Entry, Entry.getFirstInsertionPt());
+
+ // Save to raw frame pointer to alloca
+ Value *Mem = Shape.CoroBegin->getMem();
+ AllocaInst *FramePtrAddr =
+ Builder.CreateAlloca(Mem->getType(), nullptr, "alloc.frame.ptr");
+ Builder.SetInsertPoint(Shape.CoroBegin);
+ Value *MockMem = Builder.CreatePointerCast(FramePtrAddr, Mem->getType());
+ Builder.CreateStore(MockMem, FramePtrAddr);
+
+ // Ajust frame pointer value.
+ Value *NewMem = emitAlignUpTo(Builder, MockMem, FrameAlign.value());
+ Mem->replaceAllUsesWith(NewMem);
+ MockMem->replaceAllUsesWith(Mem);
+ cast<Instruction>(MockMem)->eraseFromParent();
+
+ // Replace all corofree second arg with raw frame pointer loaded from
+ // alloca.
+ replaceCoroFreeMemArg(Shape.getSwitchCoroId(), FramePtrAddr);
+
+ // Add alloca to frame.
+ (void)B.addFieldForAlloca(FramePtrAddr);
+ }
+
B.finish(FrameTy);
FrameData.updateLayoutIndex(B);
Shape.FrameAlign = B.getStructAlign();
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1236,7 +1236,8 @@
def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
def int_coro_noop : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
-def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+def int_coro_size : Intrinsic<[llvm_anyint_ty], [llvm_i1_ty], [IntrNoMem]>;
+def int_coro_align : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
Index: llvm/docs/Coroutines.rst
===================================================================
--- llvm/docs/Coroutines.rst
+++ llvm/docs/Coroutines.rst
@@ -927,8 +927,8 @@
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
::
- declare i32 @llvm.coro.size.i32()
- declare i64 @llvm.coro.size.i64()
+ declare i32 @llvm.coro.size.i32(i1 <alloc>)
+ declare i64 @llvm.coro.size.i64(i1 <alloc>)
Overview:
"""""""""
@@ -940,7 +940,10 @@
Arguments:
""""""""""
-None
+The first argument is a boolean indicating that the retuned value is used by
+alloc/dealloc functions. If it is true, overallocate may be performed to handle
+over-alignment. All alloc/delloc functions should use ``llvm.coro.size`` with
+the same ``alloc`` value.
Semantics:
""""""""""
@@ -948,6 +951,32 @@
The `coro.size` intrinsic is lowered to a constant representing the size of
the coroutine frame.
+.. _coro.align:
+
+'llvm.coro.align' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+::
+
+ declare i32 @llvm.coro.align.i32()
+ declare i64 @llvm.coro.align.i64()
+
+Overview:
+"""""""""
+
+The '``llvm.coro.align``' intrinsic returns the alignment of the coroutine frame
+in bytes. This is only supported for switched-resume coroutines.
+
+Arguments:
+""""""""""
+
+None
+
+Semantics:
+""""""""""
+
+The `coro.align` intrinsic is lowered to a constant representing the alignment
+of the coroutine frame.
+
.. _coro.begin:
'llvm.coro.begin' Intrinsic
Index: clang/lib/Sema/SemaCoroutine.cpp
===================================================================
--- clang/lib/Sema/SemaCoroutine.cpp
+++ clang/lib/Sema/SemaCoroutine.cpp
@@ -1375,8 +1375,12 @@
Expr *FramePtr =
buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_frame, {});
+ Expr *IsAlloc = IntegerLiteral::Create(
+ S.Context, llvm::APInt(32, 1),
+ S.Context.getIntTypeForBitwidth(32, /*Signed=*/0), Loc);
+
Expr *FrameSize =
- buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_size, {});
+ buildBuiltinCall(S, Loc, Builtin::BI__builtin_coro_size, {IsAlloc});
// Make new call.
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -4430,11 +4430,12 @@
return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
case Builtin::BI__builtin_coro_size: {
+ Value *Arg0 = EmitScalarExpr(E->getArg(0));
auto & Context = getContext();
auto SizeTy = Context.getSizeType();
auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
- return RValue::get(Builder.CreateCall(F));
+ return RValue::get(Builder.CreateCall(F, {Arg0}));
}
case Builtin::BI__builtin_coro_id:
Index: clang/include/clang/Basic/Builtins.def
===================================================================
--- clang/include/clang/Basic/Builtins.def
+++ clang/include/clang/Basic/Builtins.def
@@ -1573,7 +1573,7 @@
BUILTIN(__builtin_coro_done, "bv*", "n")
BUILTIN(__builtin_coro_promise, "v*v*IiIb", "n")
-BUILTIN(__builtin_coro_size, "z", "n")
+BUILTIN(__builtin_coro_size, "zb", "n")
BUILTIN(__builtin_coro_frame, "v*", "n")
BUILTIN(__builtin_coro_noop, "v*", "n")
BUILTIN(__builtin_coro_free, "v*v*", "n")
Index: clang/docs/LanguageExtensions.rst
===================================================================
--- clang/docs/LanguageExtensions.rst
+++ clang/docs/LanguageExtensions.rst
@@ -2686,7 +2686,7 @@
.. code-block:: c
- size_t __builtin_coro_size()
+ size_t __builtin_coro_size(bool alloc)
void *__builtin_coro_frame()
void *__builtin_coro_free(void *coro_frame)
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits