Hi chandlerc, rsmith, rjmccall, dberlin,
This is part of the series started by D9375, but the first Clang patch. It
teaches Clang to emit llvm.noalias calls and noalias metadata to represent
local restrict-qualified pointers.
There are a few design considerations:
First, a local restrict-qualified pointer can have multiple values assigned to
it, and they all need to fall into the same "noalias scope".
Second, the set of local scopes relevant to a given block cannot be determined
without first visiting all restrict-qualified declarations and generating their
corresponding scope MDNodes. Because the aliasing assumptions apply
"retroactively" (to all accesses within the block, not just those sequenced
after the declaration statement), we need to generate all MDNodes before
applying the metadata. When we visit the declaration as the variable is being
emitted we have an opportunity to record the address so that we can wrap all
assigned values with an llvm.noalias call.
In this implementation, I'm generating the metadata nodes when we visit the
decl when the local address is created (but before the initializer is
evaluated), and adding the address to a map of address that, when stored to,
are wrapped in llvm.noalias calls. To apply the metadata, we need to record all
memory instructions in each block (and parent blocks, etc.). To avoid incurring
this expense when no restrict-qualified decls are present, I have a pre-check
that enables this recording of memory accesses (a small recursive visitor that
looks for restrict-qualified decls).
After we're done emitting a block, we iterate over the memory-access
instructions generated for it, and add metadata for all relevant noalias scopes
of variables within that block.
http://reviews.llvm.org/D9403
Files:
lib/CodeGen/CGDecl.cpp
lib/CodeGen/CGExpr.cpp
lib/CodeGen/CGStmt.cpp
lib/CodeGen/CodeGenFunction.cpp
lib/CodeGen/CodeGenFunction.h
test/CodeGen/noalias.c
EMAIL PREFERENCES
http://reviews.llvm.org/settings/panel/emailpreferences/
Index: lib/CodeGen/CGDecl.cpp
===================================================================
--- lib/CodeGen/CGDecl.cpp
+++ lib/CodeGen/CGDecl.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Type.h"
using namespace clang;
using namespace CodeGen;
@@ -843,6 +844,7 @@
/// These turn into simple stack objects, or GlobalValues depending on target.
void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) {
AutoVarEmission emission = EmitAutoVarAlloca(D);
+ EmitAutoVarNoAlias(emission);
EmitAutoVarInit(emission);
EmitAutoVarCleanups(emission);
}
@@ -1196,6 +1198,47 @@
}
}
+void CodeGenFunction::EmitAutoVarNoAlias(const AutoVarEmission &emission) {
+ assert(emission.Variable && "emission was not valid!");
+
+ // Don't emit noalias intrinsics unless we're optimizing.
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ return;
+
+ const VarDecl &D = *emission.Variable;
+ QualType type = D.getType();
+
+ // Emit a noalias intrinsic for restrict-qualified variables.
+ if (!type.isRestrictQualified())
+ return;
+
+ llvm::MDBuilder MDB(CurFn->getContext());
+ if (!NoAliasDomain)
+ NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName());
+
+ std::string Name = CurFn->getName();
+ Name += ": ";
+ Name += D.getName();
+
+ llvm::MDNode *Scope =
+ MDB.createAnonymousAliasScope(NoAliasDomain, Name);
+ addNoAliasScope(Scope);
+
+ SmallVector<llvm::Metadata *, 8> ScopeListEntries(1, Scope);
+ llvm::MDNode *ScopeList =
+ llvm::MDNode::get(CurFn->getContext(), ScopeListEntries);
+
+ // Check whether this is a byref variable that's potentially
+ // captured and moved by its own initializer. If so, we'll need to
+ // emit the initializer first, then copy into the variable.
+ const Expr *Init = D.getInit();
+ bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init);
+
+ llvm::Value *Loc =
+ capturedByInit ? emission.Address : emission.getObjectAddress(*this);
+ NoAliasAddrMap[Loc] = ScopeList;
+}
+
/// Emit an expression as an initializer for a variable at the given
/// location. The expression is not necessarily the normal
/// initializer for the variable, and the address is not necessarily
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -1280,6 +1280,10 @@
Value = EmitToMemory(Value, Ty);
+ auto NAI = NoAliasAddrMap.find(Addr);
+ if (NAI != NoAliasAddrMap.end())
+ Value = Builder.CreateNoAlias(Value, NAI->second);
+
if (Ty->isAtomicType() ||
(!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) {
EmitAtomicStore(RValue::get(Value),
Index: lib/CodeGen/CGStmt.cpp
===================================================================
--- lib/CodeGen/CGStmt.cpp
+++ lib/CodeGen/CGStmt.cpp
@@ -16,6 +16,7 @@
#include "CodeGenModule.h"
#include "TargetInfo.h"
#include "clang/AST/StmtVisitor.h"
+#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/Basic/PrettyStackTrace.h"
#include "clang/Basic/TargetInfo.h"
#include "clang/Sema/LoopHint.h"
@@ -260,6 +261,40 @@
return true;
}
+namespace {
+/// We need to record memory instructions for this scope if there are
+/// restrict-qualified variables declared within it.
+struct RestrictFinder : RecursiveASTVisitor<RestrictFinder> {
+ bool FoundRestrictDecl;
+ RestrictFinder() : FoundRestrictDecl(false) {}
+
+ // Blocks and lambdas are handled as separate functions, so we need not
+ // traverse them in the parent context.
+ bool TraverseBlockExpr(BlockExpr *BE) { return true; }
+ bool TraverseLambdaBody(LambdaExpr *LE) { return true; }
+ bool TraverseCapturedStmt(CapturedStmt *CS) { return true; }
+
+ bool VisitVarDecl(VarDecl *VD) {
+ if (VD->getType().isRestrictQualified())
+ FoundRestrictDecl = true;
+
+ return FoundRestrictDecl;
+ }
+};
+}
+
+bool CodeGenFunction::hasLocalRestrictVars(const CompoundStmt &S) {
+ // We may have restrict-qualified variables, but if we're not optimizing, we
+ // don't do anything special with them.
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0)
+ return false;
+
+ RestrictFinder Finder;
+ // Finder.Visit(&S);
+ Finder.TraverseStmt(const_cast<CompoundStmt *>(&S));
+ return Finder.FoundRestrictDecl;
+}
+
/// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true,
/// this captures the expression result of the last sub-statement and returns it
/// (for use by the statement expression extension).
@@ -269,7 +304,7 @@
"LLVM IR generation of compound statement ('{}')");
// Keep track of the current cleanup stack depth, including debug scopes.
- LexicalScope Scope(*this, S.getSourceRange());
+ LexicalScope Scope(*this, S.getSourceRange(), hasLocalRestrictVars(S));
return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot);
}
@@ -450,6 +485,23 @@
}
}
+void CodeGenFunction::LexicalNoAliasInfo::addNoAliasMD() {
+ if (MemoryInsts.empty() || NoAliasScopes.empty())
+ return;
+
+ llvm::MDNode *NewScopeList =
+ llvm::MDNode::get(MemoryInsts[0]->getParent()->getContext(),
+ NoAliasScopes);
+
+ for (auto &I : MemoryInsts)
+ I->setMetadata(
+ llvm::LLVMContext::MD_noalias,
+ llvm::MDNode::concatenate(I->getMetadata(
+ llvm::LLVMContext::MD_noalias),
+ NewScopeList));
+
+ MemoryInsts.clear();
+}
void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) {
EmitLabel(S.getDecl());
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -54,7 +54,7 @@
CXXDefaultInitExprThis(nullptr), CXXStructorImplicitParamDecl(nullptr),
CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr),
CurLexicalScope(nullptr), TerminateLandingPad(nullptr),
- TerminateHandler(nullptr), TrapBB(nullptr) {
+ TerminateHandler(nullptr), TrapBB(nullptr), NoAliasDomain(nullptr) {
if (!suppressNewContext)
CGM.getCXXABI().getMangleContext().startNewFunction();
@@ -774,10 +774,16 @@
void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
const Stmt *Body) {
incrementProfileCounter(Body);
- if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body))
+ if (const CompoundStmt *S = dyn_cast<CompoundStmt>(Body)) {
+ if (hasLocalRestrictVars(*S))
+ FnNoAliasInfo.recordMemoryInsts();
+
EmitCompoundStmtWithoutScope(*S);
- else
+
+ FnNoAliasInfo.addNoAliasMD();
+ } else {
EmitStmt(Body);
+ }
}
/// When instrumenting to collect profile data, the counts for some blocks
@@ -1731,10 +1737,13 @@
void CodeGenFunction::InsertHelper(llvm::Instruction *I,
const llvm::Twine &Name,
llvm::BasicBlock *BB,
- llvm::BasicBlock::iterator InsertPt) const {
+ llvm::BasicBlock::iterator InsertPt) {
LoopStack.InsertHelper(I);
if (IsSanitizerScope)
CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I);
+
+ if (I->mayReadOrWriteMemory())
+ recordMemoryInstruction(I);
}
template <bool PreserveNames>
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -138,7 +138,7 @@
/// instruction is created using Builder.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
llvm::BasicBlock *BB,
- llvm::BasicBlock::iterator InsertPt) const;
+ llvm::BasicBlock::iterator InsertPt);
/// CurFuncDecl - Holds the Decl for the current outermost
/// non-closure context.
@@ -488,7 +488,34 @@
}
};
- class LexicalScope : public RunCleanupsScope {
+ bool hasLocalRestrictVars(const CompoundStmt &S);
+
+ struct LexicalNoAliasInfo {
+ bool RecordMemoryInsts;
+ SmallVector<llvm::Instruction *, 8> MemoryInsts;
+ SmallVector<llvm::Metadata *, 4> NoAliasScopes;
+
+ LexicalNoAliasInfo(bool RMI = false) : RecordMemoryInsts(RMI) {}
+
+ void recordMemoryInsts() {
+ RecordMemoryInsts = true;
+ }
+
+ void recordMemoryInstruction(llvm::Instruction *I) {
+ if (RecordMemoryInsts)
+ MemoryInsts.push_back(I);
+ }
+
+ void addNoAliasScope(llvm::MDNode *Scope) {
+ assert(RecordMemoryInsts &&
+ "Adding noalias scope but not recording memory accesses!");
+ NoAliasScopes.push_back(Scope);
+ }
+
+ void addNoAliasMD();
+ } FnNoAliasInfo;
+
+ class LexicalScope : public RunCleanupsScope, public LexicalNoAliasInfo {
SourceRange Range;
SmallVector<const LabelDecl*, 4> Labels;
LexicalScope *ParentScope;
@@ -498,8 +525,10 @@
public:
/// \brief Enter a new cleanup scope.
- explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range)
- : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) {
+ explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range,
+ bool RMI = false)
+ : RunCleanupsScope(CGF), LexicalNoAliasInfo(RMI), Range(Range),
+ ParentScope(CGF.CurLexicalScope) {
CGF.CurLexicalScope = this;
if (CGDebugInfo *DI = CGF.getDebugInfo())
DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin());
@@ -510,6 +539,14 @@
Labels.push_back(label);
}
+ void recordMemoryInstruction(llvm::Instruction *I) {
+ LexicalNoAliasInfo::recordMemoryInstruction(I);
+ if (ParentScope)
+ ParentScope->recordMemoryInstruction(I);
+ else
+ CGF.FnNoAliasInfo.recordMemoryInstruction(I);
+ }
+
/// \brief Exit this cleanup scope, emitting any accumulated
/// cleanups.
~LexicalScope() {
@@ -527,6 +564,8 @@
/// \brief Force the emission of cleanups now, instead of waiting
/// until this object is destroyed.
void ForceCleanup() {
+ addNoAliasMD();
+
CGF.CurLexicalScope = ParentScope;
RunCleanupsScope::ForceCleanup();
@@ -537,6 +576,20 @@
void rescopeLabels();
};
+ void recordMemoryInstruction(llvm::Instruction *I) {
+ if (CurLexicalScope)
+ CurLexicalScope->recordMemoryInstruction(I);
+ else
+ FnNoAliasInfo.recordMemoryInstruction(I);
+ }
+
+ void addNoAliasScope(llvm::MDNode *Scope) {
+ if (CurLexicalScope)
+ CurLexicalScope->addNoAliasScope(Scope);
+ else
+ FnNoAliasInfo.addNoAliasScope(Scope);
+ }
+
/// \brief The scope used to remap some variables as private in the OpenMP
/// loop body (or other captured region emitted without outlining), and to
/// restore old vars back on exit.
@@ -1040,6 +1093,12 @@
void EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::Function *Fn);
+ /// The noalias domain metadata for this function.
+ llvm::MDNode* NoAliasDomain;
+ /// A map between the addresses of local restrict-qualified variables and
+ /// their noalias scope.
+ llvm::DenseMap<llvm::Value *, llvm::MDNode*> NoAliasAddrMap;
+
public:
CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false);
~CodeGenFunction();
@@ -1924,6 +1983,8 @@
void emitAutoVarTypeCleanup(const AutoVarEmission &emission,
QualType::DestructionKind dtorKind);
+ void EmitAutoVarNoAlias(const AutoVarEmission &emission);
+
void EmitStaticVarDecl(const VarDecl &D,
llvm::GlobalValue::LinkageTypes Linkage);
Index: test/CodeGen/noalias.c
===================================================================
--- /dev/null
+++ test/CodeGen/noalias.c
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s
+
+int r;
+void ex1(int *);
+
+int *a;
+int *foo() {
+ int * restrict x = a;
+ return x;
+
+// CHECK-LABEL: define i32* @foo()
+// CHECK: [[x_addr_foo1:%[a-z0-9_.]+]] = alloca i32*
+// CHECK: [[x_foo1:%[a-z0-9_.]+]] = load i32*, i32** @a{{.*}}, !noalias [[TAG_foo1:!.*]]
+// CHECK: [[x_a_foo1:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo1]], metadata [[TAG_foo1]])
+// CHECK: store i32* [[x_a_foo1]], i32** [[x_addr_foo1]]{{.*}}, !noalias [[TAG_foo1]]
+}
+
+int *a2;
+int *foo1(int b) {
+ int * restrict x;
+
+// CHECK-LABEL: define i32* @foo1(i32 %b)
+// CHECK: [[x_addr_foo2:%[a-z0-9_.]+]] = alloca i32*
+// CHECK: [[x2_addr_foo2:%[a-z0-9_.]+]] = alloca i32*
+
+ if (b) {
+ x = a;
+ r += *x;
+ ex1(x);
+
+// CHECK: [[x_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a{{.*}}, !noalias [[x_x2_tag_foo2:!.*]]
+// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2:!.*]])
+// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]]
+// CHECK: call void @ex1
+ ++x;
+ *x = r;
+ ex1(x);
+
+// CHECK: [[old_x_foo2:%[a-z0-9_.]+]] = load i32*, i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]]
+// CHECK: [[x_foo2:%[a-z0-9_.]+]] = getelementptr inbounds i32, i32* [[old_x_foo2]], i32 1
+// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]])
+// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]]
+// CHECK: call void @ex1
+
+ x += b;
+ *x = r;
+ ex1(x);
+
+// CHECK: [[old_x_foo2:%[a-z0-9_.]+]] = load i32*, i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]]
+// CHECK: [[x_foo2:%[a-z0-9_.]+]] = getelementptr inbounds i32, i32* [[old_x_foo2]], i64
+// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]])
+// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]]
+// CHECK: call void @ex1
+
+ int * restrict x2 = a2;
+ *x2 = r;
+ ex1(x2);
+
+// CHECK: [[x2_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a2{{.*}}, !noalias [[x_x2_tag_foo2]]
+// CHECK: [[x2_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x2_foo2]], metadata [[x2_tag_foo2:!.*]])
+// CHECK: store i32* [[x2_a_foo2]], i32** [[x2_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]]
+// CHECK: call void @ex1
+ } else {
+ x = a2;
+ r += *x;
+
+// CHECK: [[x_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a2{{.*}}, !noalias [[x_tag_foo2]]
+// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]])
+// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_tag_foo2]]
+ }
+
+ return x;
+}
+
+int *bar() {
+ int * x = a;
+ return x;
+
+// CHECK-LABEL: define i32* @bar()
+// CHECK-NOT: noalias
+// CHECK: ret i32*
+}
+
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits