https://github.com/ddpagan created
https://github.com/llvm/llvm-project/pull/196431
Implement code generation for the OpenMP 6.0 declare_target 'local' clause,
which creates device-only variables with per-device static storage.
A 'local' variable exists in the device image with its static initializer and
is always accessed directly by device code. This is the same as 'to'/'enter'
without unified shared memory, except that no offload entry is registered.
Using 'device_type(nohost)' with 'local' is not yet supported. Sema generates a
warning and converts it to 'device_type(any)'.
Testing:
- Updated tests:
clang/test/OpenMP/declare_target_messages.cpp
clang/test/OpenMP/declare_target_ast_print.cpp
- New tests:
clang/test/OpenMP/declare_target_local_codegen.cpp
clang/test/OpenMP/declare_target_local_usm_codegen.cpp
offload/test/offloading/declare_target_local.cpp
>From ee5c6a801f3bac6f83a5a1171399776951b4cba4 Mon Sep 17 00:00:00 2001
From: Dave Pagan <[email protected]>
Date: Wed, 8 Apr 2026 17:47:28 -0500
Subject: [PATCH] [clang][OpenMP 6.0][CodeGen] Codegen for declare_target
'local' clause
Implement code generation for the OpenMP 6.0 declare_target 'local'
clause, which creates device-only variables with per-device static
storage.
A 'local' variable exists in the device image with its static
initializer and is always accessed directly by device code. This
is the same as 'to'/'enter' without unified shared memory, except
that no offload entry is registered.
Using 'device_type(nohost)' with 'local' is not yet supported. Sema
generates a warning and converts it to 'device_type(any)'.
Testing:
- Updated tests:
clang/test/OpenMP/declare_target_messages.cpp
clang/test/OpenMP/declare_target_ast_print.cpp
- New tests:
clang/test/OpenMP/declare_target_local_codegen.cpp
clang/test/OpenMP/declare_target_local_usm_codegen.cpp
offload/test/offloading/declare_target_local.cpp
---
.../clang/Basic/DiagnosticSemaKinds.td | 9 +-
clang/lib/CodeGen/CGExpr.cpp | 19 +-
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 26 +-
clang/lib/CodeGen/CodeGenModule.cpp | 11 +-
clang/lib/Sema/SemaOpenMP.cpp | 15 +-
.../test/OpenMP/declare_target_ast_print.cpp | 12 +-
.../OpenMP/declare_target_local_codegen.cpp | 430 ++++++++++++++++++
.../declare_target_local_usm_codegen.cpp | 52 +++
clang/test/OpenMP/declare_target_messages.cpp | 9 +-
.../test/offloading/declare_target_local.cpp | 40 ++
10 files changed, 577 insertions(+), 46 deletions(-)
create mode 100644 clang/test/OpenMP/declare_target_local_codegen.cpp
create mode 100644 clang/test/OpenMP/declare_target_local_usm_codegen.cpp
create mode 100644 offload/test/offloading/declare_target_local.cpp
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c69b2ce3648f8..2e4102d9e1741 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12174,6 +12174,10 @@ def err_omp_declare_target_var_in_both_clauses
def err_omp_declare_target_local_host_only
: Error<"'local' clause is incompatible with 'device_type(host)'; "
"local variables exist only on the device">;
+def warn_omp_declare_target_local_nohost
+ : Warning<"'device_type(nohost)' is not yet supported with 'local' clause;
"
+ "treating as 'device_type(any)'">,
+ InGroup<OpenMPTarget>;
def warn_omp_not_in_target_context : Warning<
"declaration is not declared in any declare target region">,
InGroup<OpenMPTarget>;
@@ -12596,11 +12600,6 @@ def err_omp_declare_target_has_local_vars : Error<
def warn_omp_declare_target_after_first_use : Warning<
"declaration marked as declare target after first use, it may lead to
incorrect results">,
InGroup<OpenMPTarget>;
-def warn_omp_declare_target_local_not_implemented
- : Warning<"'local' clause on 'declare_target' directive is not yet fully "
- "implemented; "
- "variable will be treated as 'enter'">,
- InGroup<OpenMPTarget>;
def err_omp_declare_variant_incompat_attributes : Error<
"'#pragma omp declare variant' is not compatible with any target-specific
attributes">;
def warn_omp_declare_variant_score_not_constant
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9107553652688..5764b59e538ae 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3360,19 +3360,18 @@ static Address
emitDeclTargetVarDeclLValue(CodeGenFunction &CGF,
const VarDecl *VD, QualType T) {
std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- // Return an invalid address if variable is MT_To (or MT_Enter starting with
- // OpenMP 5.2, or MT_Local in OpenMP 6.0) and unified memory is not enabled.
- // For all other cases: MT_Link and MT_To (or MT_Enter/MT_Local) with unified
- // memory, return a valid address.
- if (!Res || ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
- *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
- !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()))
+ // Always return an invalid address for MT_Local, and also for
+ // MT_To/MT_Enter when unified memory is not enabled. These use direct
+ // access (global exists in device image). Otherwise, return a valid
+ // address.
+ if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Local ||
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
+ !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()))
return Address::invalid();
assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
- *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) &&
"Expected link clause OR to clause with unified memory enabled.");
QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a99a257c14a2a..7cdc206aea0c4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1529,12 +1529,14 @@ convertCaptureClause(const VarDecl *VD) {
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
break;
case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
- case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
- break;
case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
break;
+ case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
+ // MT_Local variables don't need offload entry (device-local).
+ llvm_unreachable("MT_Local should not reach convertCaptureClause");
+ break;
default:
return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
break;
@@ -7983,8 +7985,7 @@ class MappableExprsHandler {
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
- *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
RequiresReference = true;
BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
@@ -11326,8 +11327,7 @@ bool
CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
cast<VarDecl>(GD.getDecl()));
if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
- *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
HasRequiresUnifiedSharedMemory)) {
DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
return true;
@@ -11350,6 +11350,11 @@ void
CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
VD->hasExternalStorage())
return;
+ // MT_Local variables use direct access with no host-device mapping.
+ // No offload entry needed — the device global keeps its own initializer.
+ if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Local)
+ return;
+
if (!Res) {
if (CGM.getLangOpts().OpenMPIsTargetDevice) {
// Register non-target variables being emitted in device code (debug info
@@ -11396,10 +11401,11 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const
{
OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
if (!Res)
continue;
- if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
- *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
- !HasRequiresUnifiedSharedMemory) {
+ // MT_Local and MT_To/MT_Enter without USM are always emitted.
+ if (*Res == OMPDeclareTargetDeclAttr::MT_Local ||
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
+ !HasRequiresUnifiedSharedMemory)) {
CGM.EmitGlobal(VD);
} else {
assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp
b/clang/lib/CodeGen/CodeGenModule.cpp
index e88fed1296667..5b26332765fab 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4465,16 +4465,15 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
bool UnifiedMemoryEnabled =
getOpenMPRuntime().hasRequiresUnifiedSharedMemory();
- if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
- *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
- !UnifiedMemoryEnabled) {
+ if (*Res == OMPDeclareTargetDeclAttr::MT_Local ||
+ ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
+ !UnifiedMemoryEnabled)) {
(void)GetAddrOfGlobalVar(VD);
} else {
assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
((*Res == OMPDeclareTargetDeclAttr::MT_To ||
- *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
- *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+ *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
UnifiedMemoryEnabled)) &&
"Link clause or to clause with unified memory expected.");
(void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 53ded7a5e177e..d6f6bc919a31b 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -24859,13 +24859,18 @@ void SemaOpenMP::ActOnOpenMPDeclareTargetName(
if (!IndirectE)
IsIndirect = true;
}
- // FIXME: 'local' clause is not yet implemented in CodeGen. For now, it is
- // treated as 'enter'. For host compilation, 'local' is a no-op.
+ // FIXME: 'local' with 'device_type(nohost)' is not yet fully supported
+ // in codegen. Treat as 'device_type(any)' for now. The variable will
+ // exist on both host and device, but the host copy is unused.
+ auto DT = DTCI.DT;
if (MT == OMPDeclareTargetDeclAttr::MT_Local &&
- getLangOpts().OpenMPIsTargetDevice)
- Diag(Loc, diag::warn_omp_declare_target_local_not_implemented);
+ DT == OMPDeclareTargetDeclAttr::DT_NoHost) {
+ Diag(Loc, diag::warn_omp_declare_target_local_nohost);
+ DT = OMPDeclareTargetDeclAttr::DT_Any;
+ }
+
auto *A = OMPDeclareTargetDeclAttr::CreateImplicit(
- getASTContext(), MT, DTCI.DT, IndirectE, IsIndirect, Level,
+ getASTContext(), MT, DT, IndirectE, IsIndirect, Level,
SourceRange(Loc, Loc));
ND->addAttr(A);
if (ASTMutationListener *ML = getASTContext().getASTMutationListener())
diff --git a/clang/test/OpenMP/declare_target_ast_print.cpp
b/clang/test/OpenMP/declare_target_ast_print.cpp
index 3ebe261cf79f0..7b63c15dd455e 100644
--- a/clang/test/OpenMP/declare_target_ast_print.cpp
+++ b/clang/test/OpenMP/declare_target_ast_print.cpp
@@ -4,7 +4,7 @@
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -I %S/Inputs
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50
// RUN: %clang_cc1 -verify -fopenmp -I %S/Inputs -ast-print %s | FileCheck %s
--check-prefix=CHECK --check-prefix=OMP51
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -I %S/Inputs
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP52
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -I %S/Inputs
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP60
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -I %S/Inputs
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP60
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -I %S/Inputs
-emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -I
%S/Inputs -verify %s -ast-print | FileCheck %s --check-prefix=CHECK
--check-prefix=OMP50
@@ -133,18 +133,18 @@ int l1;
// OMP60: #pragma omp end declare target
int l2;
-#pragma omp declare target device_type(nohost) local(l2)
-// OMP60: #pragma omp declare target device_type(nohost) local
+#pragma omp declare target device_type(nohost) local(l2) // omp60-warning
{{'device_type(nohost)' is not yet supported with 'local' clause; treating as
'device_type(any)'}}
+// OMP60: #pragma omp declare target local
// OMP60: int l2;
// OMP60: #pragma omp end declare target
int l3;
int a = 0;
-#pragma omp declare target local(l3) device_type(nohost) local(a)
-// OMP60: #pragma omp declare target device_type(nohost) local
+#pragma omp declare target local(l3) device_type(nohost) local(a) //
omp60-warning 2 {{'device_type(nohost)' is not yet supported with 'local'
clause; treating as 'device_type(any)'}}
+// OMP60: #pragma omp declare target local
// OMP60: int l3;
// OMP60: #pragma omp end declare target
-// OMP60: #pragma omp declare target device_type(nohost) local
+// OMP60: #pragma omp declare target local
// OMP60: int a = 0;
// OMP60: #pragma omp end declare target
diff --git a/clang/test/OpenMP/declare_target_local_codegen.cpp
b/clang/test/OpenMP/declare_target_local_codegen.cpp
new file mode 100644
index 0000000000000..b82e8b3bba9ff
--- /dev/null
+++ b/clang/test/OpenMP/declare_target_local_codegen.cpp
@@ -0,0 +1,430 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex
"__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]"
"pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o
- | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s
-o %t-host.bc
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple
amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device
-fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck
%s --check-prefix=DEVICE
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple
amdgcn-amd-amdhsa %s -fopenmp-is-target-device -fvisibility=protected
-fopenmp-host-ir-file-path %t-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple
amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device
-fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -include-pch %t -o
- | FileCheck %s --check-prefix=DEVICE
+
+#ifndef HEADER
+#define HEADER
+
+// ---------------------------------------------------------------------------
+// Explicit local clause (default device_type is 'any')
+// ---------------------------------------------------------------------------
+int local_scalar;
+#pragma omp declare target local(local_scalar)
+
+int local_array[64];
+#pragma omp declare target local(local_array)
+
+// ---------------------------------------------------------------------------
+// local + device_type(nohost)
+// ---------------------------------------------------------------------------
+int local_nohost_var;
+#pragma omp declare target local(local_nohost_var) device_type(nohost) //
omp60-warning {{'device_type(nohost)' is not yet supported with 'local' clause;
treating as 'device_type(any)'}}
+
+double local_nohost_arr[32];
+#pragma omp declare target local(local_nohost_arr) device_type(nohost) //
omp60-warning {{'device_type(nohost)' is not yet supported with 'local' clause;
treating as 'device_type(any)'}}
+
+// ---------------------------------------------------------------------------
+// Template with local variable
+// ---------------------------------------------------------------------------
+template <typename T>
+struct LocalStorage {
+ static T value;
+};
+
+template <typename T>
+T LocalStorage<T>::value;
+
+#pragma omp declare target local(LocalStorage<int>::value)
+#pragma omp declare target local(LocalStorage<double>::value)
+
+#pragma omp begin declare target
+template <typename T>
+T read_local_storage() {
+ return LocalStorage<T>::value;
+}
+#pragma omp end declare target
+
+// ---------------------------------------------------------------------------
+// Non-template static data member with local
+// ---------------------------------------------------------------------------
+struct PlainStruct {
+ static int s_member;
+};
+int PlainStruct::s_member;
+#pragma omp declare target local(PlainStruct::s_member)
+
+// ---------------------------------------------------------------------------
+// Initialized local variable
+// ---------------------------------------------------------------------------
+int local_init_var = 42;
+#pragma omp declare target local(local_init_var)
+
+// ---------------------------------------------------------------------------
+// Use local variables in a target region
+// ---------------------------------------------------------------------------
+int use_local_vars() {
+ int result = 0;
+ #pragma omp target map(from: result)
+ {
+ local_scalar = 42;
+ local_array[0] = 1;
+ LocalStorage<int>::value = 100;
+ result = local_scalar + local_array[0]
+ + read_local_storage<int>();
+ }
+ return result;
+}
+
+// ---------------------------------------------------------------------------
+// Use nohost local variables in a target region
+// ---------------------------------------------------------------------------
+int use_nohost_local_vars() {
+ int result = 0;
+ #pragma omp target map(from: result)
+ {
+ local_nohost_var = 7;
+ result = local_nohost_var;
+ }
+ return result;
+}
+
+// ---------------------------------------------------------------------------
+// Use static data member, initialized var, and static local in target region
+// ---------------------------------------------------------------------------
+int use_new_local_vars() {
+ int result = 0;
+ #pragma omp target map(from: result)
+ {
+ PlainStruct::s_member = 55;
+ local_init_var = 77;
+ result = PlainStruct::s_member + local_init_var;
+ }
+ return result;
+}
+
+#endif
+// HOST-LABEL: define {{[^@]+}}@_Z14use_local_varsv
+// HOST-SAME: () #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: entry:
+// HOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4
+// HOST-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[KERNEL_ARGS:%.*]] = alloca
[[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// HOST-NEXT: store i32 0, ptr [[RESULT]], align 4
+// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP0]], align 8
+// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP1]], align 8
+// HOST-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// HOST-NEXT: store ptr null, ptr [[TMP2]], align 8
+// HOST-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// HOST-NEXT: store ptr null, ptr [[TMP3]], align 8
+// HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// HOST-NEXT: store ptr null, ptr [[TMP4]], align 8
+// HOST-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// HOST-NEXT: store ptr null, ptr [[TMP5]], align 8
+// HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4
+// HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// HOST-NEXT: store i32 2, ptr [[TMP9]], align 4
+// HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// HOST-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// HOST-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// HOST-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
+// HOST-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// HOST-NEXT: store ptr @.offload_sizes, ptr [[TMP12]], align 8
+// HOST-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// HOST-NEXT: store ptr @.offload_maptypes, ptr [[TMP13]], align 8
+// HOST-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// HOST-NEXT: store ptr null, ptr [[TMP14]], align 8
+// HOST-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// HOST-NEXT: store ptr null, ptr [[TMP15]], align 8
+// HOST-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// HOST-NEXT: store i64 0, ptr [[TMP16]], align 8
+// HOST-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// HOST-NEXT: store i64 0, ptr [[TMP17]], align 8
+// HOST-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// HOST-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
+// HOST-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// HOST-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
+// HOST-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// HOST-NEXT: store i32 0, ptr [[TMP20]], align 4
+// HOST-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr
@[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70.region_id,
ptr [[KERNEL_ARGS]])
+// HOST-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// HOST-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label
[[OMP_OFFLOAD_CONT:%.*]]
+// HOST: omp_offload.failed:
+// HOST-NEXT: call void
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70(ptr
[[RESULT]], ptr null) #[[ATTR2:[0-9]+]]
+// HOST-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// HOST: omp_offload.cont:
+// HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4
+// HOST-NEXT: ret i32 [[TMP23]]
+//
+//
+// HOST-LABEL: define
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70
+// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]],
ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+// HOST-NEXT: entry:
+// HOST-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
+// HOST-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8,
!nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
+// HOST-NEXT: store i32 42, ptr @local_scalar, align 4
+// HOST-NEXT: store i32 1, ptr @local_array, align 4
+// HOST-NEXT: store i32 100, ptr @_ZN12LocalStorageIiE5valueE, align 4
+// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr @local_scalar, align 4
+// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr @local_array, align 4
+// HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+// HOST-NEXT: [[CALL:%.*]] = call noundef signext i32
@_Z18read_local_storageIiET_v()
+// HOST-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[CALL]]
+// HOST-NEXT: store i32 [[ADD1]], ptr [[TMP0]], align 4
+// HOST-NEXT: ret void
+//
+//
+// HOST-LABEL: define {{[^@]+}}@_Z18read_local_storageIiET_v
+// HOST-SAME: () #[[ATTR0]] comdat {
+// HOST-NEXT: entry:
+// HOST-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZN12LocalStorageIiE5valueE,
align 4
+// HOST-NEXT: ret i32 [[TMP0]]
+//
+//
+// HOST-LABEL: define {{[^@]+}}@_Z21use_nohost_local_varsv
+// HOST-SAME: () #[[ATTR0]] {
+// HOST-NEXT: entry:
+// HOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4
+// HOST-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[KERNEL_ARGS:%.*]] = alloca
[[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// HOST-NEXT: store i32 0, ptr [[RESULT]], align 4
+// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP0]], align 8
+// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP1]], align 8
+// HOST-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// HOST-NEXT: store ptr null, ptr [[TMP2]], align 8
+// HOST-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// HOST-NEXT: store ptr null, ptr [[TMP3]], align 8
+// HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// HOST-NEXT: store ptr null, ptr [[TMP4]], align 8
+// HOST-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// HOST-NEXT: store ptr null, ptr [[TMP5]], align 8
+// HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4
+// HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// HOST-NEXT: store i32 2, ptr [[TMP9]], align 4
+// HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// HOST-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// HOST-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// HOST-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
+// HOST-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// HOST-NEXT: store ptr @.offload_sizes.1, ptr [[TMP12]], align 8
+// HOST-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// HOST-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP13]], align 8
+// HOST-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// HOST-NEXT: store ptr null, ptr [[TMP14]], align 8
+// HOST-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// HOST-NEXT: store ptr null, ptr [[TMP15]], align 8
+// HOST-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// HOST-NEXT: store i64 0, ptr [[TMP16]], align 8
+// HOST-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// HOST-NEXT: store i64 0, ptr [[TMP17]], align 8
+// HOST-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// HOST-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
+// HOST-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// HOST-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
+// HOST-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// HOST-NEXT: store i32 0, ptr [[TMP20]], align 4
+// HOST-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]],
i64 -1, i32 -1, i32 0, ptr
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86.region_id,
ptr [[KERNEL_ARGS]])
+// HOST-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// HOST-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label
[[OMP_OFFLOAD_CONT:%.*]]
+// HOST: omp_offload.failed:
+// HOST-NEXT: call void
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86(ptr
[[RESULT]], ptr null) #[[ATTR2]]
+// HOST-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// HOST: omp_offload.cont:
+// HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4
+// HOST-NEXT: ret i32 [[TMP23]]
+//
+//
+// HOST-LABEL: define
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86
+// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]],
ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
+// HOST-NEXT: entry:
+// HOST-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
+// HOST-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8,
!nonnull [[META8]], !align [[META9]]
+// HOST-NEXT: store i32 7, ptr @local_nohost_var, align 4
+// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr @local_nohost_var, align 4
+// HOST-NEXT: store i32 [[TMP1]], ptr [[TMP0]], align 4
+// HOST-NEXT: ret void
+//
+//
+// HOST-LABEL: define {{[^@]+}}@_Z18use_new_local_varsv
+// HOST-SAME: () #[[ATTR0]] {
+// HOST-NEXT: entry:
+// HOST-NEXT: [[RESULT:%.*]] = alloca i32, align 4
+// HOST-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT: [[KERNEL_ARGS:%.*]] = alloca
[[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// HOST-NEXT: store i32 0, ptr [[RESULT]], align 4
+// HOST-NEXT: [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP0]], align 8
+// HOST-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT: store ptr [[RESULT]], ptr [[TMP1]], align 8
+// HOST-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// HOST-NEXT: store ptr null, ptr [[TMP2]], align 8
+// HOST-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// HOST-NEXT: store ptr null, ptr [[TMP3]], align 8
+// HOST-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// HOST-NEXT: store ptr null, ptr [[TMP4]], align 8
+// HOST-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// HOST-NEXT: store ptr null, ptr [[TMP5]], align 8
+// HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4
+// HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// HOST-NEXT: store i32 2, ptr [[TMP9]], align 4
+// HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// HOST-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// HOST-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// HOST-NEXT: store ptr [[TMP7]], ptr [[TMP11]], align 8
+// HOST-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// HOST-NEXT: store ptr @.offload_sizes.3, ptr [[TMP12]], align 8
+// HOST-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// HOST-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8
+// HOST-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// HOST-NEXT: store ptr null, ptr [[TMP14]], align 8
+// HOST-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// HOST-NEXT: store ptr null, ptr [[TMP15]], align 8
+// HOST-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// HOST-NEXT: store i64 0, ptr [[TMP16]], align 8
+// HOST-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// HOST-NEXT: store i64 0, ptr [[TMP17]], align 8
+// HOST-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// HOST-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
+// HOST-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// HOST-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
+// HOST-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// HOST-NEXT: store i32 0, ptr [[TMP20]], align 4
+// HOST-NEXT: [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]],
i64 -1, i32 -1, i32 0, ptr
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99.region_id,
ptr [[KERNEL_ARGS]])
+// HOST-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// HOST-NEXT: br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label
[[OMP_OFFLOAD_CONT:%.*]]
+// HOST: omp_offload.failed:
+// HOST-NEXT: call void
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99(ptr
[[RESULT]], ptr null) #[[ATTR2]]
+// HOST-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// HOST: omp_offload.cont:
+// HOST-NEXT: [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4
+// HOST-NEXT: ret i32 [[TMP23]]
+//
+//
+// HOST-LABEL: define
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99
+// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]],
ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
+// HOST-NEXT: entry:
+// HOST-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
+// HOST-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// HOST-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8,
!nonnull [[META8]], !align [[META9]]
+// HOST-NEXT: store i32 55, ptr @_ZN11PlainStruct8s_memberE, align 4
+// HOST-NEXT: store i32 77, ptr @local_init_var, align 4
+// HOST-NEXT: [[TMP1:%.*]] = load i32, ptr @_ZN11PlainStruct8s_memberE,
align 4
+// HOST-NEXT: [[TMP2:%.*]] = load i32, ptr @local_init_var, align 4
+// HOST-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+// HOST-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// HOST-NEXT: ret void
+//
+//
+// DEVICE-LABEL: define
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70
+// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4)
[[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT: entry:
+// DEVICE-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[RESULT_ADDR]] to ptr
+// DEVICE-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr
addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// DEVICE-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
+// DEVICE-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// DEVICE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align
8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// DEVICE-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr
addrspacecast (ptr addrspace(1)
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70_kernel_environment
to ptr), ptr [[DYN_PTR]])
+// DEVICE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// DEVICE-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]],
label [[WORKER_EXIT:%.*]]
+// DEVICE: user_code.entry:
+// DEVICE-NEXT: store i32 42, ptr addrspacecast (ptr addrspace(1)
@local_scalar to ptr), align 4
+// DEVICE-NEXT: store i32 1, ptr addrspacecast (ptr addrspace(1)
@local_array to ptr), align 4
+// DEVICE-NEXT: store i32 100, ptr addrspacecast (ptr addrspace(1)
@_ZN12LocalStorageIiE5valueE to ptr), align 4
+// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr
addrspace(1) @local_scalar to ptr), align 4
+// DEVICE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspacecast (ptr
addrspace(1) @local_array to ptr), align 4
+// DEVICE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// DEVICE-NEXT: [[CALL:%.*]] = call noundef i32
@_Z18read_local_storageIiET_v() #[[ATTR2:[0-9]+]]
+// DEVICE-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[CALL]]
+// DEVICE-NEXT: store i32 [[ADD1]], ptr [[TMP0]], align 4
+// DEVICE-NEXT: call void @__kmpc_target_deinit()
+// DEVICE-NEXT: ret void
+// DEVICE: worker.exit:
+// DEVICE-NEXT: ret void
+//
+//
+// DEVICE-LABEL: define {{[^@]+}}@_Z18read_local_storageIiET_v
+// DEVICE-SAME: () #[[ATTR1:[0-9]+]] comdat {
+// DEVICE-NEXT: entry:
+// DEVICE-NEXT: [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr
addrspace(1) @_ZN12LocalStorageIiE5valueE to ptr), align 4
+// DEVICE-NEXT: ret i32 [[TMP0]]
+//
+//
+// DEVICE-LABEL: define
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86
+// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4)
[[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
+// DEVICE-NEXT: entry:
+// DEVICE-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[RESULT_ADDR]] to ptr
+// DEVICE-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr
addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// DEVICE-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
+// DEVICE-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// DEVICE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align
8, !nonnull [[META7]], !align [[META8]]
+// DEVICE-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr
addrspacecast (ptr addrspace(1)
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86_kernel_environment
to ptr), ptr [[DYN_PTR]])
+// DEVICE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// DEVICE-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]],
label [[WORKER_EXIT:%.*]]
+// DEVICE: user_code.entry:
+// DEVICE-NEXT: store i32 7, ptr addrspacecast (ptr addrspace(1)
@local_nohost_var to ptr), align 4
+// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr
addrspace(1) @local_nohost_var to ptr), align 4
+// DEVICE-NEXT: store i32 [[TMP2]], ptr [[TMP0]], align 4
+// DEVICE-NEXT: call void @__kmpc_target_deinit()
+// DEVICE-NEXT: ret void
+// DEVICE: worker.exit:
+// DEVICE-NEXT: ret void
+//
+//
+// DEVICE-LABEL: define
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99
+// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4)
[[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
+// DEVICE-NEXT: entry:
+// DEVICE-NEXT: [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT: [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5)
[[RESULT_ADDR]] to ptr
+// DEVICE-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr
addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// DEVICE-NEXT: store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
+// DEVICE-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// DEVICE-NEXT: [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align
8, !nonnull [[META7]], !align [[META8]]
+// DEVICE-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr
addrspacecast (ptr addrspace(1)
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99_kernel_environment
to ptr), ptr [[DYN_PTR]])
+// DEVICE-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// DEVICE-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]],
label [[WORKER_EXIT:%.*]]
+// DEVICE: user_code.entry:
+// DEVICE-NEXT: store i32 55, ptr addrspacecast (ptr addrspace(1)
@_ZN11PlainStruct8s_memberE to ptr), align 4
+// DEVICE-NEXT: store i32 77, ptr addrspacecast (ptr addrspace(1)
@local_init_var to ptr), align 4
+// DEVICE-NEXT: [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr
addrspace(1) @_ZN11PlainStruct8s_memberE to ptr), align 4
+// DEVICE-NEXT: [[TMP3:%.*]] = load i32, ptr addrspacecast (ptr
addrspace(1) @local_init_var to ptr), align 4
+// DEVICE-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// DEVICE-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// DEVICE-NEXT: call void @__kmpc_target_deinit()
+// DEVICE-NEXT: ret void
+// DEVICE: worker.exit:
+// DEVICE-NEXT: ret void
+//
diff --git a/clang/test/OpenMP/declare_target_local_usm_codegen.cpp
b/clang/test/OpenMP/declare_target_local_usm_codegen.cpp
new file mode 100644
index 0000000000000..d97d6f409d265
--- /dev/null
+++ b/clang/test/OpenMP/declare_target_local_usm_codegen.cpp
@@ -0,0 +1,52 @@
+// Test that declare target local variables are NOT affected by
+// unified_shared_memory. Local variables always use direct access
+// (no offload entry, no _decl_tgt_ref_ptr) regardless of USM. For
+// comparison, enter variables with USM use pointer-reference indirection
+// when normally they would also be direct access.
+//
+// CHECK lines not auto-generated because they are specifically verifying
+// absence of ref ptr and offload entry for local variable and, by contrast,
+// presence of ref ptr and offload entry for enter variable.
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o
- | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s
-o %t-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple
amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device
-fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck
%s --check-prefix=DEVICE
+
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+#pragma omp requires unified_shared_memory
+
+int local_var;
+#pragma omp declare target local(local_var)
+
+int enter_var;
+#pragma omp declare target enter(enter_var)
+
+// local_var: direct access, no ref ptr, no offload entry
+// HOST-DAG: @local_var = global i32 0
+// HOST-NOT: @local_var_decl_tgt_ref_ptr
+
+// enter_var with USM: pointer-reference indirection
+// HOST-DAG: @enter_var_decl_tgt_ref_ptr = weak global ptr @enter_var
+// HOST-DAG: @.offloading.entry.enter_var_decl_tgt_ref_ptr = weak constant
%struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr
@enter_var_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i64 0,
ptr null }, section "llvm_offload_entries"
+
+// Device: local_var is a direct global, enter_var uses ref ptr
+// DEVICE-DAG: @local_var = protected addrspace(1) global i32 0
+// DEVICE-NOT: @local_var_decl_tgt_ref_ptr
+// DEVICE-DAG: @enter_var_decl_tgt_ref_ptr = weak global ptr null
+
+int use_vars() {
+ int result = 0;
+#pragma omp target map(from: result)
+ {
+ local_var = 42;
+ enter_var = 10;
+ result = local_var + enter_var;
+ }
+ return result;
+}
+
+#endif
diff --git a/clang/test/OpenMP/declare_target_messages.cpp
b/clang/test/OpenMP/declare_target_messages.cpp
index 9875bd95141fd..6fe477755dbe7 100644
--- a/clang/test/OpenMP/declare_target_messages.cpp
+++ b/clang/test/OpenMP/declare_target_messages.cpp
@@ -152,10 +152,14 @@ void func() {}
// expected-note@+1 {{'func_local' defined here}}
void func_local() {}
-// dev60-warning@+3 {{'local' clause on 'declare_target' directive is not yet
fully implemented; variable will be treated as 'enter'}}
// omp60-error@+2 {{unexpected 'allocate' clause, only 'enter', 'link',
'device_type', 'indirect' or 'local' clauses expected}}
// expected-error@+1 {{function name is not allowed in 'local' clause}}
#pragma omp declare target local(func_local) allocate(a)
+
+// omp60-error@+1 {{unexpected 'local' clause, only 'device_type', 'indirect'
clauses expected}}
+#pragma omp begin declare target local
+int begin_local_var;
+#pragma omp end declare target
#endif // _OPENMP
void bar();
@@ -335,7 +339,6 @@ int y_enter_local;
#pragma omp declare target local(y_enter_local)
int y_local_enter;
-// dev60-warning@+1 {{'local' clause on 'declare_target' directive is not yet
fully implemented; variable will be treated as 'enter'}}
#pragma omp declare target local(y_local_enter)
// expected-error@+1 {{'y_local_enter' must not appear in both clauses 'local'
and 'enter'}}
#pragma omp declare target enter(y_local_enter)
@@ -346,7 +349,6 @@ int y_link_local;
#pragma omp declare target local(y_link_local)
int y_local_link;
-// dev60-warning@+1 {{'local' clause on 'declare_target' directive is not yet
fully implemented; variable will be treated as 'enter'}}
#pragma omp declare target local(y_local_link)
// expected-error@+1 {{'y_local_link' must not appear in both clauses 'local'
and 'link'}}
#pragma omp declare target link(y_local_link)
@@ -466,7 +468,6 @@ int MultiDevTy;
#pragma omp declare target to(MultiDevTy) device_type(nohost)
int counter = 0;
-// dev60-warning@+9 {{'local' clause on 'declare_target' directive is not yet
fully implemented; variable will be treated as 'enter'}}
// omp52-error@+8 {{unexpected 'local' clause, only 'enter', 'link',
'device_type' or 'indirect' clauses expected}}
// omp52-error@+7 {{expected at least one 'enter', 'link' or 'indirect'
clause}}
// omp51-error@+6 {{unexpected 'local' clause, only 'to', 'link',
'device_type' or 'indirect' clauses expected}}
diff --git a/offload/test/offloading/declare_target_local.cpp
b/offload/test/offloading/declare_target_local.cpp
new file mode 100644
index 0000000000000..9bff4ea38a407
--- /dev/null
+++ b/offload/test/offloading/declare_target_local.cpp
@@ -0,0 +1,40 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=60
+// RUN: %libomptarget-run-generic | %fcheck-generic
+// RUN: %libomptarget-compileoptxx-generic -fopenmp-version=60
+// RUN: %libomptarget-run-generic | %fcheck-generic
+// clang-format on
+
+// Sanity test for OpenMP 6.0 declare target 'local' clause.
+// Verify 'local' variable has device-local storage, that it has
+// correct initial value, persists across target regions, and is
+// independent from host copy.
+
+#include <cstdio>
+int local_var = 42;
+#pragma omp declare target local(local_var)
+
+int main() {
+ // Device should get the initializer value.
+ int init = -1;
+#pragma omp target map(from : init)
+ init = local_var;
+
+ // Device write should persist to a second region.
+ int persist = -1;
+#pragma omp target
+ local_var = 100;
+#pragma omp target map(from : persist)
+ persist = local_var;
+
+ // Host copy should not be affected by device write. Should
+ // retain original value.
+
+ // CHECK: PASS
+ if (init == 42 && persist == 100 && local_var == 42)
+ printf("PASS\n");
+ else
+ printf("FAIL init=%d persist=%d host=%d\n", init, persist, local_var);
+
+ return (init == 42 && persist == 100 && local_var == 42) ? 0 : 1;
+}
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits