https://github.com/ddpagan created 
https://github.com/llvm/llvm-project/pull/196431

Implement code generation for the OpenMP 6.0 declare_target 'local' clause, 
which creates device-only variables with per-device static storage.

A 'local' variable exists in the device image with its static initializer and 
is always accessed directly by device code. This is the same as 'to'/'enter' 
without unified shared memory, except that no offload entry is registered.

Using 'device_type(nohost)' with 'local' is not yet supported. Sema generates a 
warning and converts it to 'device_type(any)'.

Testing:
- Updated tests:
     clang/test/OpenMP/declare_target_messages.cpp
     clang/test/OpenMP/declare_target_ast_print.cpp
- New tests:
     clang/test/OpenMP/declare_target_local_codegen.cpp
     clang/test/OpenMP/declare_target_local_usm_codegen.cpp
     offload/test/offloading/declare_target_local.cpp

>From ee5c6a801f3bac6f83a5a1171399776951b4cba4 Mon Sep 17 00:00:00 2001
From: Dave Pagan <[email protected]>
Date: Wed, 8 Apr 2026 17:47:28 -0500
Subject: [PATCH] [clang][OpenMP 6.0][CodeGen] Codegen for declare_target
 'local' clause

Implement code generation for the OpenMP 6.0 declare_target 'local'
clause, which creates device-only variables with per-device static
storage.

A 'local' variable exists in the device image with its static
initializer and is always accessed directly by device code. This
is the same as 'to'/'enter' without unified shared memory, except
that no offload entry is registered.

Using 'device_type(nohost)' with 'local' is not yet supported. Sema
generates a warning and converts it to 'device_type(any)'.

Testing:
- Updated tests:
    clang/test/OpenMP/declare_target_messages.cpp
    clang/test/OpenMP/declare_target_ast_print.cpp
- New tests:
    clang/test/OpenMP/declare_target_local_codegen.cpp
    clang/test/OpenMP/declare_target_local_usm_codegen.cpp
    offload/test/offloading/declare_target_local.cpp
---
 .../clang/Basic/DiagnosticSemaKinds.td        |   9 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  19 +-
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  26 +-
 clang/lib/CodeGen/CodeGenModule.cpp           |  11 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |  15 +-
 .../test/OpenMP/declare_target_ast_print.cpp  |  12 +-
 .../OpenMP/declare_target_local_codegen.cpp   | 430 ++++++++++++++++++
 .../declare_target_local_usm_codegen.cpp      |  52 +++
 clang/test/OpenMP/declare_target_messages.cpp |   9 +-
 .../test/offloading/declare_target_local.cpp  |  40 ++
 10 files changed, 577 insertions(+), 46 deletions(-)
 create mode 100644 clang/test/OpenMP/declare_target_local_codegen.cpp
 create mode 100644 clang/test/OpenMP/declare_target_local_usm_codegen.cpp
 create mode 100644 offload/test/offloading/declare_target_local.cpp

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c69b2ce3648f8..2e4102d9e1741 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12174,6 +12174,10 @@ def err_omp_declare_target_var_in_both_clauses
 def err_omp_declare_target_local_host_only
     : Error<"'local' clause is incompatible with 'device_type(host)'; "
             "local variables exist only on the device">;
+def warn_omp_declare_target_local_nohost
+    : Warning<"'device_type(nohost)' is not yet supported with 'local' clause; 
"
+              "treating as 'device_type(any)'">,
+      InGroup<OpenMPTarget>;
 def warn_omp_not_in_target_context : Warning<
   "declaration is not declared in any declare target region">,
   InGroup<OpenMPTarget>;
@@ -12596,11 +12600,6 @@ def err_omp_declare_target_has_local_vars : Error<
 def warn_omp_declare_target_after_first_use : Warning<
   "declaration marked as declare target after first use, it may lead to 
incorrect results">,
   InGroup<OpenMPTarget>;
-def warn_omp_declare_target_local_not_implemented
-    : Warning<"'local' clause on 'declare_target' directive is not yet fully "
-              "implemented; "
-              "variable will be treated as 'enter'">,
-      InGroup<OpenMPTarget>;
 def err_omp_declare_variant_incompat_attributes : Error<
   "'#pragma omp declare variant' is not compatible with any target-specific 
attributes">;
 def warn_omp_declare_variant_score_not_constant
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9107553652688..5764b59e538ae 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3360,19 +3360,18 @@ static Address 
emitDeclTargetVarDeclLValue(CodeGenFunction &CGF,
                                            const VarDecl *VD, QualType T) {
   std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
       OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
-  // Return an invalid address if variable is MT_To (or MT_Enter starting with
-  // OpenMP 5.2, or MT_Local in OpenMP 6.0) and unified memory is not enabled.
-  // For all other cases: MT_Link and MT_To (or MT_Enter/MT_Local) with unified
-  // memory, return a valid address.
-  if (!Res || ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
-                *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
-                *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
-               !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()))
+  // Always return an invalid address for MT_Local, and also for
+  // MT_To/MT_Enter when unified memory is not enabled. These use direct
+  // access (global exists in device image). Otherwise, return a valid
+  // address.
+  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Local ||
+      ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+        *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
+       !CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()))
     return Address::invalid();
   assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
           ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
-            *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
-            *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+            *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
            CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) &&
          "Expected link clause OR to clause with unified memory enabled.");
   QualType PtrTy = CGF.getContext().getPointerType(VD->getType());
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 
b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a99a257c14a2a..7cdc206aea0c4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1529,12 +1529,14 @@ convertCaptureClause(const VarDecl *VD) {
     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
     break;
   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
-  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
-    break;
   case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
     break;
+  case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Local:
+    // MT_Local variables don't need offload entry (device-local).
+    llvm_unreachable("MT_Local should not reach convertCaptureClause");
+    break;
   default:
     return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
     break;
@@ -7983,8 +7985,7 @@ class MappableExprsHandler {
                 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
           if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
               ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
-                *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
-                *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+                *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
                CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
             RequiresReference = true;
             BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
@@ -11326,8 +11327,7 @@ bool 
CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
           cast<VarDecl>(GD.getDecl()));
   if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
       ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
-        *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
-        *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+        *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
        HasRequiresUnifiedSharedMemory)) {
     DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
     return true;
@@ -11350,6 +11350,11 @@ void 
CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
       VD->hasExternalStorage())
     return;
 
+  // MT_Local variables use direct access with no host-device mapping.
+  // No offload entry needed — the device global keeps its own initializer.
+  if (Res && *Res == OMPDeclareTargetDeclAttr::MT_Local)
+    return;
+
   if (!Res) {
     if (CGM.getLangOpts().OpenMPIsTargetDevice) {
       // Register non-target variables being emitted in device code (debug info
@@ -11396,10 +11401,11 @@ void CGOpenMPRuntime::emitDeferredTargetDecls() const 
{
         OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
     if (!Res)
       continue;
-    if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
-         *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
-         *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
-        !HasRequiresUnifiedSharedMemory) {
+    // MT_Local and MT_To/MT_Enter without USM are always emitted.
+    if (*Res == OMPDeclareTargetDeclAttr::MT_Local ||
+        ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+          *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
+         !HasRequiresUnifiedSharedMemory)) {
       CGM.EmitGlobal(VD);
     } else {
       assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index e88fed1296667..5b26332765fab 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4465,16 +4465,15 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
 
           bool UnifiedMemoryEnabled =
               getOpenMPRuntime().hasRequiresUnifiedSharedMemory();
-          if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
-               *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
-               *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
-              !UnifiedMemoryEnabled) {
+          if (*Res == OMPDeclareTargetDeclAttr::MT_Local ||
+              ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
+                *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
+               !UnifiedMemoryEnabled)) {
             (void)GetAddrOfGlobalVar(VD);
           } else {
             assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
                     ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
-                      *Res == OMPDeclareTargetDeclAttr::MT_Enter ||
-                      *Res == OMPDeclareTargetDeclAttr::MT_Local) &&
+                      *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
                      UnifiedMemoryEnabled)) &&
                    "Link clause or to clause with unified memory expected.");
             (void)getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 53ded7a5e177e..d6f6bc919a31b 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -24859,13 +24859,18 @@ void SemaOpenMP::ActOnOpenMPDeclareTargetName(
     if (!IndirectE)
       IsIndirect = true;
   }
-  // FIXME: 'local' clause is not yet implemented in CodeGen. For now, it is
-  // treated as 'enter'. For host compilation, 'local' is a no-op.
+  // FIXME: 'local' with 'device_type(nohost)' is not yet fully supported
+  // in codegen. Treat as 'device_type(any)' for now. The variable will
+  // exist on both host and device, but the host copy is unused.
+  auto DT = DTCI.DT;
   if (MT == OMPDeclareTargetDeclAttr::MT_Local &&
-      getLangOpts().OpenMPIsTargetDevice)
-    Diag(Loc, diag::warn_omp_declare_target_local_not_implemented);
+      DT == OMPDeclareTargetDeclAttr::DT_NoHost) {
+    Diag(Loc, diag::warn_omp_declare_target_local_nohost);
+    DT = OMPDeclareTargetDeclAttr::DT_Any;
+  }
+
   auto *A = OMPDeclareTargetDeclAttr::CreateImplicit(
-      getASTContext(), MT, DTCI.DT, IndirectE, IsIndirect, Level,
+      getASTContext(), MT, DT, IndirectE, IsIndirect, Level,
       SourceRange(Loc, Loc));
   ND->addAttr(A);
   if (ASTMutationListener *ML = getASTContext().getASTMutationListener())
diff --git a/clang/test/OpenMP/declare_target_ast_print.cpp 
b/clang/test/OpenMP/declare_target_ast_print.cpp
index 3ebe261cf79f0..7b63c15dd455e 100644
--- a/clang/test/OpenMP/declare_target_ast_print.cpp
+++ b/clang/test/OpenMP/declare_target_ast_print.cpp
@@ -4,7 +4,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -I %S/Inputs 
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP50
 // RUN: %clang_cc1 -verify -fopenmp -I %S/Inputs -ast-print %s | FileCheck %s 
--check-prefix=CHECK --check-prefix=OMP51
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -I %S/Inputs 
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP52
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -I %S/Inputs 
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP60
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -I %S/Inputs 
-ast-print %s | FileCheck %s --check-prefix=CHECK --check-prefix=OMP60
 
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -I %S/Inputs 
-emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -std=c++11 -include-pch %t -I 
%S/Inputs -verify %s -ast-print | FileCheck %s --check-prefix=CHECK 
--check-prefix=OMP50
@@ -133,18 +133,18 @@ int l1;
 // OMP60: #pragma omp end declare target
 
 int l2;
-#pragma omp declare target device_type(nohost) local(l2)
-// OMP60: #pragma omp declare target device_type(nohost) local
+#pragma omp declare target device_type(nohost) local(l2) // omp60-warning 
{{'device_type(nohost)' is not yet supported with 'local' clause; treating as 
'device_type(any)'}}
+// OMP60: #pragma omp declare target local
 // OMP60: int l2;
 // OMP60: #pragma omp end declare target
 
 int l3;
 int a = 0;
-#pragma omp declare target local(l3) device_type(nohost) local(a)
-// OMP60: #pragma omp declare target device_type(nohost) local
+#pragma omp declare target local(l3) device_type(nohost) local(a) // 
omp60-warning 2 {{'device_type(nohost)' is not yet supported with 'local' 
clause; treating as 'device_type(any)'}}
+// OMP60: #pragma omp declare target local
 // OMP60: int l3;
 // OMP60: #pragma omp end declare target
-// OMP60: #pragma omp declare target device_type(nohost) local
+// OMP60: #pragma omp declare target local
 // OMP60: int a = 0;
 // OMP60: #pragma omp end declare target
 
diff --git a/clang/test/OpenMP/declare_target_local_codegen.cpp 
b/clang/test/OpenMP/declare_target_local_codegen.cpp
new file mode 100644
index 0000000000000..b82e8b3bba9ff
--- /dev/null
+++ b/clang/test/OpenMP/declare_target_local_codegen.cpp
@@ -0,0 +1,430 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex 
"__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" 
"pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple 
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o 
- | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple 
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s 
-o %t-host.bc
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple 
amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device 
-fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck 
%s --check-prefix=DEVICE
+// RUN: %clang_cc1 -verify=omp60 -fopenmp -fopenmp-version=60 -x c++ -triple 
amdgcn-amd-amdhsa %s -fopenmp-is-target-device -fvisibility=protected 
-fopenmp-host-ir-file-path %t-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple 
amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device 
-fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -include-pch %t -o 
- | FileCheck %s --check-prefix=DEVICE
+
+#ifndef HEADER
+#define HEADER
+
+// ---------------------------------------------------------------------------
+// Explicit local clause (default device_type is 'any')
+// ---------------------------------------------------------------------------
+int local_scalar;
+#pragma omp declare target local(local_scalar)
+
+int local_array[64];
+#pragma omp declare target local(local_array)
+
+// ---------------------------------------------------------------------------
+// local + device_type(nohost)
+// ---------------------------------------------------------------------------
+int local_nohost_var;
+#pragma omp declare target local(local_nohost_var) device_type(nohost) // 
omp60-warning {{'device_type(nohost)' is not yet supported with 'local' clause; 
treating as 'device_type(any)'}}
+
+double local_nohost_arr[32];
+#pragma omp declare target local(local_nohost_arr) device_type(nohost) // 
omp60-warning {{'device_type(nohost)' is not yet supported with 'local' clause; 
treating as 'device_type(any)'}}
+
+// ---------------------------------------------------------------------------
+// Template with local variable
+// ---------------------------------------------------------------------------
+template <typename T>
+struct LocalStorage {
+  static T value;
+};
+
+template <typename T>
+T LocalStorage<T>::value;
+
+#pragma omp declare target local(LocalStorage<int>::value)
+#pragma omp declare target local(LocalStorage<double>::value)
+
+#pragma omp begin declare target
+template <typename T>
+T read_local_storage() {
+  return LocalStorage<T>::value;
+}
+#pragma omp end declare target
+
+// ---------------------------------------------------------------------------
+// Non-template static data member with local
+// ---------------------------------------------------------------------------
+struct PlainStruct {
+  static int s_member;
+};
+int PlainStruct::s_member;
+#pragma omp declare target local(PlainStruct::s_member)
+
+// ---------------------------------------------------------------------------
+// Initialized local variable
+// ---------------------------------------------------------------------------
+int local_init_var = 42;
+#pragma omp declare target local(local_init_var)
+
+// ---------------------------------------------------------------------------
+// Use local variables in a target region
+// ---------------------------------------------------------------------------
+int use_local_vars() {
+  int result = 0;
+  #pragma omp target map(from: result)
+  {
+    local_scalar = 42;
+    local_array[0] = 1;
+    LocalStorage<int>::value = 100;
+    result = local_scalar + local_array[0]
+             + read_local_storage<int>();
+  }
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// Use nohost local variables in a target region
+// ---------------------------------------------------------------------------
+int use_nohost_local_vars() {
+  int result = 0;
+  #pragma omp target map(from: result)
+  {
+    local_nohost_var = 7;
+    result = local_nohost_var;
+  }
+  return result;
+}
+
+// ---------------------------------------------------------------------------
+// Use static data member, initialized var, and static local in target region
+// ---------------------------------------------------------------------------
+int use_new_local_vars() {
+  int result = 0;
+  #pragma omp target map(from: result)
+  {
+    PlainStruct::s_member = 55;
+    local_init_var = 77;
+    result = PlainStruct::s_member + local_init_var;
+  }
+  return result;
+}
+
+#endif
+// HOST-LABEL: define {{[^@]+}}@_Z14use_local_varsv
+// HOST-SAME: () #[[ATTR0:[0-9]+]] {
+// HOST-NEXT:  entry:
+// HOST-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+// HOST-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[KERNEL_ARGS:%.*]] = alloca 
[[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// HOST-NEXT:    store i32 0, ptr [[RESULT]], align 4
+// HOST-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[TMP0]], align 8
+// HOST-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[TMP1]], align 8
+// HOST-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// HOST-NEXT:    store ptr null, ptr [[TMP2]], align 8
+// HOST-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// HOST-NEXT:    store ptr null, ptr [[TMP3]], align 8
+// HOST-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// HOST-NEXT:    store ptr null, ptr [[TMP4]], align 8
+// HOST-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// HOST-NEXT:    store ptr null, ptr [[TMP5]], align 8
+// HOST-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// HOST-NEXT:    store i32 4, ptr [[TMP8]], align 4
+// HOST-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// HOST-NEXT:    store i32 2, ptr [[TMP9]], align 4
+// HOST-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// HOST-NEXT:    store ptr [[TMP6]], ptr [[TMP10]], align 8
+// HOST-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// HOST-NEXT:    store ptr [[TMP7]], ptr [[TMP11]], align 8
+// HOST-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// HOST-NEXT:    store ptr @.offload_sizes, ptr [[TMP12]], align 8
+// HOST-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// HOST-NEXT:    store ptr @.offload_maptypes, ptr [[TMP13]], align 8
+// HOST-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// HOST-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// HOST-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// HOST-NEXT:    store ptr null, ptr [[TMP15]], align 8
+// HOST-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// HOST-NEXT:    store i64 0, ptr [[TMP16]], align 8
+// HOST-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// HOST-NEXT:    store i64 0, ptr [[TMP17]], align 8
+// HOST-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// HOST-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
+// HOST-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// HOST-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
+// HOST-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// HOST-NEXT:    store i32 0, ptr [[TMP20]], align 4
+// HOST-NEXT:    [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr 
@[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr 
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70.region_id, 
ptr [[KERNEL_ARGS]])
+// HOST-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// HOST-NEXT:    br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label 
[[OMP_OFFLOAD_CONT:%.*]]
+// HOST:       omp_offload.failed:
+// HOST-NEXT:    call void 
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70(ptr 
[[RESULT]], ptr null) #[[ATTR2:[0-9]+]]
+// HOST-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// HOST:       omp_offload.cont:
+// HOST-NEXT:    [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4
+// HOST-NEXT:    ret i32 [[TMP23]]
+//
+//
+// HOST-LABEL: define 
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70
+// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], 
ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+// HOST-NEXT:  entry:
+// HOST-NEXT:    [[RESULT_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
+// HOST-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// HOST-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, 
!nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
+// HOST-NEXT:    store i32 42, ptr @local_scalar, align 4
+// HOST-NEXT:    store i32 1, ptr @local_array, align 4
+// HOST-NEXT:    store i32 100, ptr @_ZN12LocalStorageIiE5valueE, align 4
+// HOST-NEXT:    [[TMP1:%.*]] = load i32, ptr @local_scalar, align 4
+// HOST-NEXT:    [[TMP2:%.*]] = load i32, ptr @local_array, align 4
+// HOST-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+// HOST-NEXT:    [[CALL:%.*]] = call noundef signext i32 
@_Z18read_local_storageIiET_v()
+// HOST-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[CALL]]
+// HOST-NEXT:    store i32 [[ADD1]], ptr [[TMP0]], align 4
+// HOST-NEXT:    ret void
+//
+//
+// HOST-LABEL: define {{[^@]+}}@_Z18read_local_storageIiET_v
+// HOST-SAME: () #[[ATTR0]] comdat {
+// HOST-NEXT:  entry:
+// HOST-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZN12LocalStorageIiE5valueE, 
align 4
+// HOST-NEXT:    ret i32 [[TMP0]]
+//
+//
+// HOST-LABEL: define {{[^@]+}}@_Z21use_nohost_local_varsv
+// HOST-SAME: () #[[ATTR0]] {
+// HOST-NEXT:  entry:
+// HOST-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+// HOST-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[KERNEL_ARGS:%.*]] = alloca 
[[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// HOST-NEXT:    store i32 0, ptr [[RESULT]], align 4
+// HOST-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[TMP0]], align 8
+// HOST-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[TMP1]], align 8
+// HOST-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// HOST-NEXT:    store ptr null, ptr [[TMP2]], align 8
+// HOST-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// HOST-NEXT:    store ptr null, ptr [[TMP3]], align 8
+// HOST-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// HOST-NEXT:    store ptr null, ptr [[TMP4]], align 8
+// HOST-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// HOST-NEXT:    store ptr null, ptr [[TMP5]], align 8
+// HOST-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// HOST-NEXT:    store i32 4, ptr [[TMP8]], align 4
+// HOST-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// HOST-NEXT:    store i32 2, ptr [[TMP9]], align 4
+// HOST-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// HOST-NEXT:    store ptr [[TMP6]], ptr [[TMP10]], align 8
+// HOST-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// HOST-NEXT:    store ptr [[TMP7]], ptr [[TMP11]], align 8
+// HOST-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// HOST-NEXT:    store ptr @.offload_sizes.1, ptr [[TMP12]], align 8
+// HOST-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// HOST-NEXT:    store ptr @.offload_maptypes.2, ptr [[TMP13]], align 8
+// HOST-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// HOST-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// HOST-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// HOST-NEXT:    store ptr null, ptr [[TMP15]], align 8
+// HOST-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// HOST-NEXT:    store i64 0, ptr [[TMP16]], align 8
+// HOST-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// HOST-NEXT:    store i64 0, ptr [[TMP17]], align 8
+// HOST-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// HOST-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
+// HOST-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// HOST-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
+// HOST-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// HOST-NEXT:    store i32 0, ptr [[TMP20]], align 4
+// HOST-NEXT:    [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], 
i64 -1, i32 -1, i32 0, ptr 
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86.region_id,
 ptr [[KERNEL_ARGS]])
+// HOST-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// HOST-NEXT:    br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label 
[[OMP_OFFLOAD_CONT:%.*]]
+// HOST:       omp_offload.failed:
+// HOST-NEXT:    call void 
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86(ptr 
[[RESULT]], ptr null) #[[ATTR2]]
+// HOST-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// HOST:       omp_offload.cont:
+// HOST-NEXT:    [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4
+// HOST-NEXT:    ret i32 [[TMP23]]
+//
+//
+// HOST-LABEL: define 
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86
+// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], 
ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
+// HOST-NEXT:  entry:
+// HOST-NEXT:    [[RESULT_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
+// HOST-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// HOST-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, 
!nonnull [[META8]], !align [[META9]]
+// HOST-NEXT:    store i32 7, ptr @local_nohost_var, align 4
+// HOST-NEXT:    [[TMP1:%.*]] = load i32, ptr @local_nohost_var, align 4
+// HOST-NEXT:    store i32 [[TMP1]], ptr [[TMP0]], align 4
+// HOST-NEXT:    ret void
+//
+//
+// HOST-LABEL: define {{[^@]+}}@_Z18use_new_local_varsv
+// HOST-SAME: () #[[ATTR0]] {
+// HOST-NEXT:  entry:
+// HOST-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+// HOST-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x ptr], align 8
+// HOST-NEXT:    [[KERNEL_ARGS:%.*]] = alloca 
[[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// HOST-NEXT:    store i32 0, ptr [[RESULT]], align 4
+// HOST-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[TMP0]], align 8
+// HOST-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[TMP1]], align 8
+// HOST-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// HOST-NEXT:    store ptr null, ptr [[TMP2]], align 8
+// HOST-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// HOST-NEXT:    store ptr null, ptr [[TMP3]], align 8
+// HOST-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// HOST-NEXT:    store ptr null, ptr [[TMP4]], align 8
+// HOST-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// HOST-NEXT:    store ptr null, ptr [[TMP5]], align 8
+// HOST-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// HOST-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr 
[[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// HOST-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// HOST-NEXT:    store i32 4, ptr [[TMP8]], align 4
+// HOST-NEXT:    [[TMP9:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// HOST-NEXT:    store i32 2, ptr [[TMP9]], align 4
+// HOST-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// HOST-NEXT:    store ptr [[TMP6]], ptr [[TMP10]], align 8
+// HOST-NEXT:    [[TMP11:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// HOST-NEXT:    store ptr [[TMP7]], ptr [[TMP11]], align 8
+// HOST-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// HOST-NEXT:    store ptr @.offload_sizes.3, ptr [[TMP12]], align 8
+// HOST-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// HOST-NEXT:    store ptr @.offload_maptypes.4, ptr [[TMP13]], align 8
+// HOST-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// HOST-NEXT:    store ptr null, ptr [[TMP14]], align 8
+// HOST-NEXT:    [[TMP15:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// HOST-NEXT:    store ptr null, ptr [[TMP15]], align 8
+// HOST-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// HOST-NEXT:    store i64 0, ptr [[TMP16]], align 8
+// HOST-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// HOST-NEXT:    store i64 0, ptr [[TMP17]], align 8
+// HOST-NEXT:    [[TMP18:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// HOST-NEXT:    store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP18]], align 4
+// HOST-NEXT:    [[TMP19:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// HOST-NEXT:    store [3 x i32] zeroinitializer, ptr [[TMP19]], align 4
+// HOST-NEXT:    [[TMP20:%.*]] = getelementptr inbounds nuw 
[[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// HOST-NEXT:    store i32 0, ptr [[TMP20]], align 4
+// HOST-NEXT:    [[TMP21:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], 
i64 -1, i32 -1, i32 0, ptr 
@.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99.region_id,
 ptr [[KERNEL_ARGS]])
+// HOST-NEXT:    [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
+// HOST-NEXT:    br i1 [[TMP22]], label [[OMP_OFFLOAD_FAILED:%.*]], label 
[[OMP_OFFLOAD_CONT:%.*]]
+// HOST:       omp_offload.failed:
+// HOST-NEXT:    call void 
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99(ptr 
[[RESULT]], ptr null) #[[ATTR2]]
+// HOST-NEXT:    br label [[OMP_OFFLOAD_CONT]]
+// HOST:       omp_offload.cont:
+// HOST-NEXT:    [[TMP23:%.*]] = load i32, ptr [[RESULT]], align 4
+// HOST-NEXT:    ret i32 [[TMP23]]
+//
+//
+// HOST-LABEL: define 
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99
+// HOST-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[RESULT:%.*]], 
ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR1]] {
+// HOST-NEXT:  entry:
+// HOST-NEXT:    [[RESULT_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// HOST-NEXT:    store ptr [[RESULT]], ptr [[RESULT_ADDR]], align 8
+// HOST-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// HOST-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR]], align 8, 
!nonnull [[META8]], !align [[META9]]
+// HOST-NEXT:    store i32 55, ptr @_ZN11PlainStruct8s_memberE, align 4
+// HOST-NEXT:    store i32 77, ptr @local_init_var, align 4
+// HOST-NEXT:    [[TMP1:%.*]] = load i32, ptr @_ZN11PlainStruct8s_memberE, 
align 4
+// HOST-NEXT:    [[TMP2:%.*]] = load i32, ptr @local_init_var, align 4
+// HOST-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+// HOST-NEXT:    store i32 [[ADD]], ptr [[TMP0]], align 4
+// HOST-NEXT:    ret void
+//
+//
+// DEVICE-LABEL: define 
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70
+// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4) 
[[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT:  entry:
+// DEVICE-NEXT:    [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT:    [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RESULT_ADDR]] to ptr
+// DEVICE-NEXT:    [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// DEVICE-NEXT:    store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
+// DEVICE-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// DEVICE-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 
8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// DEVICE-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr 
addrspacecast (ptr addrspace(1) 
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14use_local_varsv_l70_kernel_environment
 to ptr), ptr [[DYN_PTR]])
+// DEVICE-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// DEVICE-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], 
label [[WORKER_EXIT:%.*]]
+// DEVICE:       user_code.entry:
+// DEVICE-NEXT:    store i32 42, ptr addrspacecast (ptr addrspace(1) 
@local_scalar to ptr), align 4
+// DEVICE-NEXT:    store i32 1, ptr addrspacecast (ptr addrspace(1) 
@local_array to ptr), align 4
+// DEVICE-NEXT:    store i32 100, ptr addrspacecast (ptr addrspace(1) 
@_ZN12LocalStorageIiE5valueE to ptr), align 4
+// DEVICE-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr 
addrspace(1) @local_scalar to ptr), align 4
+// DEVICE-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspacecast (ptr 
addrspace(1) @local_array to ptr), align 4
+// DEVICE-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// DEVICE-NEXT:    [[CALL:%.*]] = call noundef i32 
@_Z18read_local_storageIiET_v() #[[ATTR2:[0-9]+]]
+// DEVICE-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[CALL]]
+// DEVICE-NEXT:    store i32 [[ADD1]], ptr [[TMP0]], align 4
+// DEVICE-NEXT:    call void @__kmpc_target_deinit()
+// DEVICE-NEXT:    ret void
+// DEVICE:       worker.exit:
+// DEVICE-NEXT:    ret void
+//
+//
+// DEVICE-LABEL: define {{[^@]+}}@_Z18read_local_storageIiET_v
+// DEVICE-SAME: () #[[ATTR1:[0-9]+]] comdat {
+// DEVICE-NEXT:  entry:
+// DEVICE-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspacecast (ptr 
addrspace(1) @_ZN12LocalStorageIiE5valueE to ptr), align 4
+// DEVICE-NEXT:    ret i32 [[TMP0]]
+//
+//
+// DEVICE-LABEL: define 
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86
+// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4) 
[[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
+// DEVICE-NEXT:  entry:
+// DEVICE-NEXT:    [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT:    [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RESULT_ADDR]] to ptr
+// DEVICE-NEXT:    [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// DEVICE-NEXT:    store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
+// DEVICE-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// DEVICE-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 
8, !nonnull [[META7]], !align [[META8]]
+// DEVICE-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr 
addrspacecast (ptr addrspace(1) 
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z21use_nohost_local_varsv_l86_kernel_environment
 to ptr), ptr [[DYN_PTR]])
+// DEVICE-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// DEVICE-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], 
label [[WORKER_EXIT:%.*]]
+// DEVICE:       user_code.entry:
+// DEVICE-NEXT:    store i32 7, ptr addrspacecast (ptr addrspace(1) 
@local_nohost_var to ptr), align 4
+// DEVICE-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr 
addrspace(1) @local_nohost_var to ptr), align 4
+// DEVICE-NEXT:    store i32 [[TMP2]], ptr [[TMP0]], align 4
+// DEVICE-NEXT:    call void @__kmpc_target_deinit()
+// DEVICE-NEXT:    ret void
+// DEVICE:       worker.exit:
+// DEVICE-NEXT:    ret void
+//
+//
+// DEVICE-LABEL: define 
{{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99
+// DEVICE-SAME: (ptr noundef nonnull align 4 dereferenceable(4) 
[[RESULT:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
+// DEVICE-NEXT:  entry:
+// DEVICE-NEXT:    [[RESULT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT:    [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// DEVICE-NEXT:    [[RESULT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RESULT_ADDR]] to ptr
+// DEVICE-NEXT:    [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr 
addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// DEVICE-NEXT:    store ptr [[RESULT]], ptr [[RESULT_ADDR_ASCAST]], align 8
+// DEVICE-NEXT:    store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// DEVICE-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RESULT_ADDR_ASCAST]], align 
8, !nonnull [[META7]], !align [[META8]]
+// DEVICE-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr 
addrspacecast (ptr addrspace(1) 
@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18use_new_local_varsv_l99_kernel_environment
 to ptr), ptr [[DYN_PTR]])
+// DEVICE-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// DEVICE-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], 
label [[WORKER_EXIT:%.*]]
+// DEVICE:       user_code.entry:
+// DEVICE-NEXT:    store i32 55, ptr addrspacecast (ptr addrspace(1) 
@_ZN11PlainStruct8s_memberE to ptr), align 4
+// DEVICE-NEXT:    store i32 77, ptr addrspacecast (ptr addrspace(1) 
@local_init_var to ptr), align 4
+// DEVICE-NEXT:    [[TMP2:%.*]] = load i32, ptr addrspacecast (ptr 
addrspace(1) @_ZN11PlainStruct8s_memberE to ptr), align 4
+// DEVICE-NEXT:    [[TMP3:%.*]] = load i32, ptr addrspacecast (ptr 
addrspace(1) @local_init_var to ptr), align 4
+// DEVICE-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], [[TMP3]]
+// DEVICE-NEXT:    store i32 [[ADD]], ptr [[TMP0]], align 4
+// DEVICE-NEXT:    call void @__kmpc_target_deinit()
+// DEVICE-NEXT:    ret void
+// DEVICE:       worker.exit:
+// DEVICE-NEXT:    ret void
+//
diff --git a/clang/test/OpenMP/declare_target_local_usm_codegen.cpp 
b/clang/test/OpenMP/declare_target_local_usm_codegen.cpp
new file mode 100644
index 0000000000000..d97d6f409d265
--- /dev/null
+++ b/clang/test/OpenMP/declare_target_local_usm_codegen.cpp
@@ -0,0 +1,52 @@
+// Test that declare target local variables are NOT affected by
+// unified_shared_memory. Local variables always use direct access
+// (no offload entry, no _decl_tgt_ref_ptr) regardless of USM. For
+// comparison, enter variables with USM use pointer-reference indirection
+// when normally they would also be direct access.
+//
+// CHECK lines not auto-generated because they are specifically verifying
+// absence of ref ptr and offload entry for local variable and, by contrast,
+// presence of ref ptr and offload entry for enter variable.
+
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple 
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o 
- | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple 
powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s 
-o %t-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple 
amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device 
-fvisibility=protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck 
%s --check-prefix=DEVICE
+
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+#pragma omp requires unified_shared_memory
+
+int local_var;
+#pragma omp declare target local(local_var)
+
+int enter_var;
+#pragma omp declare target enter(enter_var)
+
+// local_var: direct access, no ref ptr, no offload entry
+// HOST-DAG: @local_var = global i32 0
+// HOST-NOT: @local_var_decl_tgt_ref_ptr
+
+// enter_var with USM: pointer-reference indirection
+// HOST-DAG: @enter_var_decl_tgt_ref_ptr = weak global ptr @enter_var
+// HOST-DAG: @.offloading.entry.enter_var_decl_tgt_ref_ptr = weak constant 
%struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr 
@enter_var_decl_tgt_ref_ptr, ptr @.offloading.entry_name{{.*}}, i64 8, i64 0, 
ptr null }, section "llvm_offload_entries"
+
+// Device: local_var is a direct global, enter_var uses ref ptr
+// DEVICE-DAG: @local_var = protected addrspace(1) global i32 0
+// DEVICE-NOT: @local_var_decl_tgt_ref_ptr
+// DEVICE-DAG: @enter_var_decl_tgt_ref_ptr = weak global ptr null
+
+int use_vars() {
+  int result = 0;
+#pragma omp target map(from: result)
+  {
+    local_var = 42;
+    enter_var = 10;
+    result = local_var + enter_var;
+  }
+  return result;
+}
+
+#endif
diff --git a/clang/test/OpenMP/declare_target_messages.cpp 
b/clang/test/OpenMP/declare_target_messages.cpp
index 9875bd95141fd..6fe477755dbe7 100644
--- a/clang/test/OpenMP/declare_target_messages.cpp
+++ b/clang/test/OpenMP/declare_target_messages.cpp
@@ -152,10 +152,14 @@ void func() {}
 // expected-note@+1 {{'func_local' defined here}}
 void func_local() {} 
 
-// dev60-warning@+3 {{'local' clause on 'declare_target' directive is not yet 
fully implemented; variable will be treated as 'enter'}}
 // omp60-error@+2 {{unexpected 'allocate' clause, only 'enter', 'link', 
'device_type', 'indirect' or 'local' clauses expected}}
 // expected-error@+1 {{function name is not allowed in 'local' clause}}
 #pragma omp declare target local(func_local) allocate(a)
+
+// omp60-error@+1 {{unexpected 'local' clause, only 'device_type', 'indirect' 
clauses expected}}
+#pragma omp begin declare target local
+int begin_local_var;
+#pragma omp end declare target
 #endif // _OPENMP
 
 void bar();
@@ -335,7 +339,6 @@ int y_enter_local;
 #pragma omp declare target local(y_enter_local)
 
 int y_local_enter;
-// dev60-warning@+1 {{'local' clause on 'declare_target' directive is not yet 
fully implemented; variable will be treated as 'enter'}}
 #pragma omp declare target local(y_local_enter)
 // expected-error@+1 {{'y_local_enter' must not appear in both clauses 'local' 
and 'enter'}}
 #pragma omp declare target enter(y_local_enter)
@@ -346,7 +349,6 @@ int y_link_local;
 #pragma omp declare target local(y_link_local)
 
 int y_local_link;
-// dev60-warning@+1 {{'local' clause on 'declare_target' directive is not yet 
fully implemented; variable will be treated as 'enter'}}
 #pragma omp declare target local(y_local_link)
 // expected-error@+1 {{'y_local_link' must not appear in both clauses 'local' 
and 'link'}}
 #pragma omp declare target link(y_local_link)
@@ -466,7 +468,6 @@ int MultiDevTy;
 #pragma omp declare target to(MultiDevTy) device_type(nohost)
 
 int counter = 0;
-// dev60-warning@+9 {{'local' clause on 'declare_target' directive is not yet 
fully implemented; variable will be treated as 'enter'}}
 // omp52-error@+8 {{unexpected 'local' clause, only 'enter', 'link', 
'device_type' or 'indirect' clauses expected}}
 // omp52-error@+7 {{expected at least one 'enter', 'link' or 'indirect' 
clause}}
 // omp51-error@+6 {{unexpected 'local' clause, only 'to', 'link', 
'device_type' or 'indirect' clauses expected}}
diff --git a/offload/test/offloading/declare_target_local.cpp 
b/offload/test/offloading/declare_target_local.cpp
new file mode 100644
index 0000000000000..9bff4ea38a407
--- /dev/null
+++ b/offload/test/offloading/declare_target_local.cpp
@@ -0,0 +1,40 @@
+// clang-format off
+// RUN: %libomptarget-compilexx-generic -fopenmp-version=60
+// RUN: %libomptarget-run-generic | %fcheck-generic
+// RUN: %libomptarget-compileoptxx-generic -fopenmp-version=60
+// RUN: %libomptarget-run-generic | %fcheck-generic
+// clang-format on
+
+// Sanity test for OpenMP 6.0 declare target 'local' clause.
+// Verify 'local' variable has device-local storage, that it has
+// correct initial value, persists across target regions, and is
+// independent from host copy.
+
+#include <cstdio>
+int local_var = 42;
+#pragma omp declare target local(local_var)
+
+int main() {
+  // Device should get the initializer value.
+  int init = -1;
+#pragma omp target map(from : init)
+  init = local_var;
+
+  // Device write should persist to a second region.
+  int persist = -1;
+#pragma omp target
+  local_var = 100;
+#pragma omp target map(from : persist)
+  persist = local_var;
+
+  // Host copy should not be affected by device write. Should
+  // retain original value.
+
+  // CHECK: PASS
+  if (init == 42 && persist == 100 && local_var == 42)
+    printf("PASS\n");
+  else
+    printf("FAIL init=%d persist=%d host=%d\n", init, persist, local_var);
+
+  return (init == 42 && persist == 100 && local_var == 42) ? 0 : 1;
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to