Hi rsmith, rjmccall, fraggamuffin, ejstotzer,
Currently, if global variable is marked as a private OpenMP variable, the
compiler crashes in debug version or generates incorrect code in release
version. It happens because in the OpenMP region the original global variable
is used instead of the generated private copy. It happens because currently
globals variables are not captured in the OpenMP region.
This patch adds capturing of global variables iff private copy of the global
variable must be used in the OpenMP region.
http://reviews.llvm.org/D6259
Files:
include/clang/Sema/Sema.h
lib/CodeGen/CGExpr.cpp
lib/CodeGen/CodeGenFunction.h
lib/Sema/SemaExpr.cpp
lib/Sema/SemaOpenMP.cpp
test/OpenMP/parallel_firstprivate_codegen.cpp
test/OpenMP/parallel_private_codegen.cpp
Index: test/OpenMP/parallel_firstprivate_codegen.cpp
===================================================================
--- test/OpenMP/parallel_firstprivate_codegen.cpp
+++ test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -1,6 +1,8 @@
// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple %itanium_abi_triple -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple %itanium_abi_triple -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
@@ -12,7 +14,7 @@
~St() {}
};
-volatile int g;
+volatile int g = 1212;
template <class T>
struct S {
@@ -47,6 +49,83 @@
}
int main() {
+#ifdef LAMBDA
+ // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // LAMBDA-LABEL: @main
+ // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
+ // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
+ // LAMBDA: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+#pragma omp parallel firstprivate(g)
+ {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+ // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+ // LAMBDA: [[ARG:%.+]] = load %{{.+}}** [[ARG_REF]]
+ // LAMBDA: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_REF_ADDR]]
+ // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}* [[G_REF]]
+ // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // LAMBDA: call void @__kmpc_barrier(
+ g = 1;
+ // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+ // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+ // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}** [[ARG_PTR_REF]]
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_PTR_REF]]
+ // LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+ }();
+ }
+ }();
+ return 0;
+#elif defined(BLOCKS)
+ // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // BLOCKS-LABEL: @main
+ // BLOCKS: call void {{%.+}}(i8*
+ ^{
+ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+ // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
+ // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
+ // BLOCKS: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+#pragma omp parallel firstprivate(g)
+ {
+ // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+ // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+ // BLOCKS: [[ARG:%.+]] = load %{{.+}}** [[ARG_REF]]
+ // BLOCKS: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_REF_ADDR]]
+ // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}* [[G_REF]]
+ // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // BLOCKS: call void @__kmpc_barrier(
+ g = 1;
+ // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: call void {{%.+}}(i8*
+ ^{
+ // BLOCKS: define {{.+}} void {{@.+}}(i8*
+ g = 2;
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}*
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: ret
+ }();
+ }
+ }();
+ return 0;
+#else
S<float> test;
int t_var = 0;
int vec[] = {1, 2};
@@ -58,6 +137,7 @@
s_arr[0] = var;
}
return tmain<int>();
+#endif
}
// CHECK: define {{.*}}i{{[0-9]+}} @main()
Index: test/OpenMP/parallel_private_codegen.cpp
===================================================================
--- test/OpenMP/parallel_private_codegen.cpp
+++ test/OpenMP/parallel_private_codegen.cpp
@@ -1,6 +1,8 @@
// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
// expected-no-diagnostics
#ifndef HEADER
#define HEADER
@@ -14,15 +16,17 @@
~S() {}
};
+volatile int g = 1212;
+
// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
// CHECK: [[CAP_MAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
// CHECK: [[CAP_TMAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
template <typename T>
T tmain() {
S<T> test;
- T t_var;
+ T t_var = T();
T vec[] = {1, 2};
S<T> s_arr[] = {1, 2};
S<T> var(3);
@@ -35,8 +39,75 @@
}
int main() {
+#ifdef LAMBDA
+ // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // LAMBDA-LABEL: @main
+ // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
+ // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
+ // LAMBDA: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+#pragma omp parallel private(g)
+ {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+ // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+ // LAMBDA: call void @__kmpc_barrier(
+ g = 1;
+ // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+ // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+ // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}** [[ARG_PTR_REF]]
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}** [[G_PTR_REF]]
+ // LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+ }();
+ }
+ }();
+ return 0;
+#elif defined(BLOCKS)
+ // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
+ // BLOCKS-LABEL: @main
+ // BLOCKS: call void {{%.+}}(i8*
+ ^{
+ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+ // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
+ // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
+ // BLOCKS: call void {{.+}}* @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+#pragma omp parallel private(g)
+ {
+ // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+ // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+ // BLOCKS: call void @__kmpc_barrier(
+ g = 1;
+ // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: call void {{%.+}}(i8*
+ ^{
+ // BLOCKS: define {{.+}} void {{@.+}}(i8*
+ g = 2;
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}*
+ // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+ // BLOCKS: ret
+ }();
+ }
+ }();
+ return 0;
+#else
S<float> test;
- int t_var;
+ int t_var = 0;
int vec[] = {1, 2};
S<float> s_arr[] = {1, 2};
S<float> var(3);
@@ -46,6 +117,7 @@
s_arr[0] = var;
}
return tmain<int>();
+#endif
}
// CHECK: define i{{[0-9]+}} @main()
Index: include/clang/Sema/Sema.h
===================================================================
--- include/clang/Sema/Sema.h
+++ include/clang/Sema/Sema.h
@@ -7454,6 +7454,10 @@
ExprResult VerifyPositiveIntegerConstantInClause(Expr *Op,
OpenMPClauseKind CKind);
public:
+ /// \brief Checks if the specified variable is used in one of the private
+ /// clauses in OpenMP constructs.
+ bool IsOpenMPPrivateVar(VarDecl *VD);
+
ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
Expr *Op);
/// \brief Called on start of new data sharing attribute block.
Index: lib/Sema/SemaOpenMP.cpp
===================================================================
--- lib/Sema/SemaOpenMP.cpp
+++ lib/Sema/SemaOpenMP.cpp
@@ -551,6 +551,18 @@
#define DSAStack static_cast<DSAStackTy *>(VarDataSharingAttributesStack)
+bool Sema::IsOpenMPPrivateVar(VarDecl *VD) {
+ if (LangOpts.OpenMP && DSAStack->getCurrentDirective() != OMPD_unknown) {
+ auto DVarPrivate = DSAStack->getTopDSA(VD, /*FromParent=*/false);
+ if (DVarPrivate.CKind != OMPC_unknown && isOpenMPPrivate(DVarPrivate.CKind))
+ return true;
+ DVarPrivate = DSAStack->hasDSA(VD, isOpenMPPrivate, MatchesAlways(),
+ /*FromParent=*/false);
+ return DVarPrivate.CKind != OMPC_unknown;
+ }
+ return false;
+}
+
void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind,
Index: lib/Sema/SemaExpr.cpp
===================================================================
--- lib/Sema/SemaExpr.cpp
+++ lib/Sema/SemaExpr.cpp
@@ -11654,7 +11654,7 @@
const bool Diagnose, Sema &S) {
if (isa<BlockDecl>(DC) || isa<CapturedDecl>(DC) || isLambdaCallOperator(DC))
return getLambdaAwareParentOfDeclContext(DC);
- else {
+ else if (Var->hasLocalStorage()) {
if (Diagnose)
diagnoseUncapturableValueReference(S, Loc, Var, DC);
}
@@ -12090,6 +12090,10 @@
return true;
}
+static bool NeedToCaptureGlobalVariable(Sema &S, VarDecl *VD) {
+ return S.IsOpenMPPrivateVar(VD);
+}
+
bool Sema::tryCaptureVariable(VarDecl *Var, SourceLocation ExprLoc,
TryCaptureKind Kind, SourceLocation EllipsisLoc,
bool BuildAndDiagnose,
@@ -12115,7 +12119,8 @@
// If the variable is declared in the current context (and is not an
// init-capture), there is no need to capture it.
if (!Var->isInitCapture() && Var->getDeclContext() == DC) return true;
- if (!Var->hasLocalStorage()) return true;
+ if (!Var->hasLocalStorage() && !NeedToCaptureGlobalVariable(*this, Var))
+ return true;
// Walk up the stack to determine whether we can capture the variable,
// performing the "simple" checks that don't depend on type. We stop when
@@ -12136,8 +12141,14 @@
ExprLoc,
BuildAndDiagnose,
*this);
- if (!ParentDC) return true;
-
+ if (!ParentDC) {
+ if (!Var->hasLocalStorage()) {
+ FunctionScopesIndex = MaxFunctionScopesIndex - 1;
+ break;
+ }
+ return true;
+ }
+
FunctionScopeInfo *FSI = FunctionScopes[FunctionScopesIndex];
CapturingScopeInfo *CSI = cast<CapturingScopeInfo>(FSI);
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -614,7 +614,6 @@
addPrivate(const VarDecl *LocalVD,
const std::function<llvm::Value *()> &PrivateGen) {
assert(PerformCleanup && "adding private to dead scope");
- assert(LocalVD->isLocalVarDecl() && "privatizing non-local variable");
if (SavedLocals.count(LocalVD) > 0) return false;
SavedLocals[LocalVD] = CGF.LocalDeclMap.lookup(LocalVD);
CGF.LocalDeclMap.erase(LocalVD);
Index: lib/CodeGen/CGExpr.cpp
===================================================================
--- lib/CodeGen/CGExpr.cpp
+++ lib/CodeGen/CGExpr.cpp
@@ -1897,6 +1897,23 @@
QualType T = E->getType();
if (const auto *VD = dyn_cast<VarDecl>(ND)) {
+ // Check for captured globals.
+ if (!VD->hasLocalStorage()) {
+ if (FieldDecl *FD = LambdaCaptureFields.lookup(VD))
+ return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
+ else if (CapturedStmtInfo) {
+ if (const FieldDecl *FD = CapturedStmtInfo->lookup(VD)) {
+ if (llvm::Value *V = LocalDeclMap.lookup(VD))
+ return MakeAddrLValue(V, T, Alignment);
+ else
+ return EmitCapturedFieldLValue(*this, FD,
+ CapturedStmtInfo->getContextValue());
+ }
+ } else if (auto *BD = dyn_cast_or_null<BlockDecl>(CurCodeDecl))
+ if (BD->capturesVariable(VD))
+ return MakeAddrLValue(
+ GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()), T, Alignment);
+ }
// Global Named registers access via intrinsics only
if (VD->getStorageClass() == SC_Register &&
VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits