[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-22 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 closed 
https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-22 Thread Yaxun Liu via cfe-commits

https://github.com/yxsamliu approved this pull request.


https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-22 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/123437

>From 4414706b8ced9048a572fb78544a7e637c4946a0 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH 1/3] [HIP] Support managed variables using the new driver

Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.

In the future, a more extensible entry format would be nice, but that
can be done later.
---
 clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
 clang/test/CodeGenCUDA/offloading-entries.cu  | 78 +--
 clang/test/Driver/linker-wrapper-image.c  | 28 ---
 .../llvm/Frontend/Offloading/Utility.h|  4 +
 .../Frontend/Offloading/OffloadWrapper.cpp| 16 
 llvm/lib/Frontend/Offloading/Utility.cpp  | 10 +++
 6 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d91511..0fc81491c40855d 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, 
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+  } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+  }
 } else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
   llvm::offloading::emitOffloadingEntry(
   M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu 
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94f..d46a25969e3ecd7 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
 // RUN:   --offload-new-driver -emit-llvm -o - -x cuda  %s | FileCheck \
 // RUN:   --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
 
 #include "Inputs/cuda.h"
 
+#define __managed__ __attribute__((managed))
+
 //.
+// CUDA: @managed = global i32 undef, align 4
 // CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] 
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z3foov = weak constant 
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr 
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] 
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z6kernelv = weak constant 
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr 
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] 
c"var\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry.var = we

[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-21 Thread Matt Arsenault via cfe-commits


@@ -1221,12 +1221,34 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  // TODO: Update the offloading entries struct to avoid this indirection.
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, 
Initializer,
+I.Var->getName(), /*InsertBefore=*/nullptr,
+llvm::GlobalVariable::NotThreadLocal,
+CGM.getContext().getTargetAddressSpace(LangAS::Default));

arsenm wrote:

No, this is not coming from the language. This should use 
DL.getDefaultGlobalsAddressSpace 

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-21 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/123437

>From bed6550941c0fafe2975288e49957a5a36895cf2 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH 1/3] [HIP] Support managed variables using the new driver

Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.

In the future, a more extensible entry format would be nice, but that
can be done later.
---
 clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
 clang/test/CodeGenCUDA/offloading-entries.cu  | 78 +--
 clang/test/Driver/linker-wrapper-image.c  | 28 ---
 .../llvm/Frontend/Offloading/Utility.h|  4 +
 .../Frontend/Offloading/OffloadWrapper.cpp| 16 
 llvm/lib/Frontend/Offloading/Utility.cpp  | 10 +++
 6 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, 
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+  } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+  }
 } else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
   llvm::offloading::emitOffloadingEntry(
   M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu 
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
 // RUN:   --offload-new-driver -emit-llvm -o - -x cuda  %s | FileCheck \
 // RUN:   --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
 
 #include "Inputs/cuda.h"
 
+#define __managed__ __attribute__((managed))
+
 //.
+// CUDA: @managed = global i32 undef, align 4
 // CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] 
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z3foov = weak constant 
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr 
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] 
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z6kernelv = weak constant 
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr 
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] 
c"var\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry.var = weak c

[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-21 Thread Joseph Huber via cfe-commits


@@ -353,6 +353,16 @@ Function *createRegisterGlobalsFunction(Module &M, bool 
IsHIP,
   FunctionCallee RegVar = M.getOrInsertFunction(
   IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
 
+  // Get the __cudaRegisterSurface function declaration.

jhuber6 wrote:

Fixed

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-21 Thread Yaxun Liu via cfe-commits


@@ -353,6 +353,16 @@ Function *createRegisterGlobalsFunction(Module &M, bool 
IsHIP,
   FunctionCallee RegVar = M.getOrInsertFunction(
   IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy);
 
+  // Get the __cudaRegisterSurface function declaration.

yxsamliu wrote:

typo

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-21 Thread Joseph Huber via cfe-commits

jhuber6 wrote:

I'm currently working on redoing the offloading entry format. Can this land as 
an interim solution so I don't need to redo the work there?

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-21 Thread Matt Arsenault via cfe-commits

arsenm wrote:

> Wonder if I should try really hard to update the struct we use for this 
> before the branch in 8 days, since if it's default in CUDA now it'd break ABI 
> for a release.

Can you implement the new path under a switch? 

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-20 Thread Joseph Huber via cfe-commits

jhuber6 wrote:

Wonder if I should try really hard to update the struct we use for this before 
the branch in 8 days, since if it's default in CUDA now it'd break ABI for a 
release.

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-18 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/123437

>From bed6550941c0fafe2975288e49957a5a36895cf2 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH 1/2] [HIP] Support managed variables using the new driver

Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.

In the future, a more extensible entry format would be nice, but that
can be done later.
---
 clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
 clang/test/CodeGenCUDA/offloading-entries.cu  | 78 +--
 clang/test/Driver/linker-wrapper-image.c  | 28 ---
 .../llvm/Frontend/Offloading/Utility.h|  4 +
 .../Frontend/Offloading/OffloadWrapper.cpp| 16 
 llvm/lib/Frontend/Offloading/Utility.cpp  | 10 +++
 6 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, 
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+  } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+  }
 } else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
   llvm::offloading::emitOffloadingEntry(
   M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu 
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
 // RUN:   --offload-new-driver -emit-llvm -o - -x cuda  %s | FileCheck \
 // RUN:   --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
 
 #include "Inputs/cuda.h"
 
+#define __managed__ __attribute__((managed))
+
 //.
+// CUDA: @managed = global i32 undef, align 4
 // CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] 
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z3foov = weak constant 
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr 
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] 
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z6kernelv = weak constant 
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr 
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] 
c"var\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry.var = weak c

[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-18 Thread Matt Arsenault via cfe-commits


@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+

arsenm wrote:

Add a todo about the format change to use one struct 

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-17 Thread Matt Arsenault via cfe-commits


@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, 
Initializer,

arsenm wrote:

Should set the address space, probably to the default globals one 

https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-17 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Joseph Huber (jhuber6)


Changes

Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.

In the future, a more extensible entry format would be nice, but that
can be done later.


---

Patch is 22.56 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123437.diff


6 Files Affected:

- (modified) clang/lib/CodeGen/CGCUDANV.cpp (+25-6) 
- (modified) clang/test/CodeGenCUDA/offloading-entries.cu (+36-42) 
- (modified) clang/test/Driver/linker-wrapper-image.c (+18-10) 
- (modified) llvm/include/llvm/Frontend/Offloading/Utility.h (+4) 
- (modified) llvm/lib/Frontend/Offloading/OffloadWrapper.cpp (+16) 
- (modified) llvm/lib/Frontend/Offloading/Utility.cpp (+10) 


``diff
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, 
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+  } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+  }
 } else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
   llvm::offloading::emitOffloadingEntry(
   M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu 
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
 // RUN:   --offload-new-driver -emit-llvm -o - -x cuda  %s | FileCheck \
 // RUN:   --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
 
 #include "Inputs/cuda.h"
 
+#define __managed__ __attribute__((managed))
+
 //.
+// CUDA: @managed = global i32 undef, align 4
 // CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] 
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z3foov = weak constant 
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr 
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] 
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z6kernelv = weak constant 
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr 
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] 
c"var\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { 
ptr @var, ptr @.offloading.entry_nam

[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)

2025-01-17 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 created 
https://github.com/llvm/llvm-project/pull/123437

Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.

In the future, a more extensible entry format would be nice, but that
can be done later.


>From bed6550941c0fafe2975288e49957a5a36895cf2 Mon Sep 17 00:00:00 2001
From: Joseph Huber 
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH] [HIP] Support managed variables using the new driver

Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.

In the future, a more extensible entry format would be nice, but that
can be done later.
---
 clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
 clang/test/CodeGenCUDA/offloading-entries.cu  | 78 +--
 clang/test/Driver/linker-wrapper-image.c  | 28 ---
 .../llvm/Frontend/Offloading/Utility.h|  4 +
 .../Frontend/Offloading/OffloadWrapper.cpp| 16 
 llvm/lib/Frontend/Offloading/Utility.cpp  | 10 +++
 6 files changed, 109 insertions(+), 58 deletions(-)

diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
  ? static_cast(llvm::offloading::OffloadGlobalNormalized)
  : 0);
 if (I.Flags.getKind() == DeviceVarFlags::Variable) {
-  llvm::offloading::emitOffloadingEntry(
-  M, I.Var, getDeviceSideName(I.D), VarSize,
-  (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
-   : llvm::offloading::OffloadGlobalEntry) |
-  Flags,
-  /*Data=*/0, Section);
+  if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+   "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage, 
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+  } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+  }
 } else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
   llvm::offloading::emitOffloadingEntry(
   M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu 
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
 // RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
 // RUN:   --offload-new-driver -emit-llvm -o - -x cuda  %s | FileCheck \
 // RUN:   --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
 
 #include "Inputs/cuda.h"
 
+#define __managed__ __attribute__((managed))
+
 //.
+// CUDA: @managed = global i32 undef, align 4
 // CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] 
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z3foov = weak constant 
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr 
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section 
"cuda_offloading_entries", align 1
 // CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] 
c"_Z6kernelv\00", section ".llvm.rodata.offl