[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
https://github.com/jhuber6 closed https://github.com/llvm/llvm-project/pull/123437 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
https://github.com/yxsamliu approved this pull request. https://github.com/llvm/llvm-project/pull/123437 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
https://github.com/jhuber6 updated
https://github.com/llvm/llvm-project/pull/123437
>From 4414706b8ced9048a572fb78544a7e637c4946a0 Mon Sep 17 00:00:00 2001
From: Joseph Huber
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH 1/3] [HIP] Support managed variables using the new driver
Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.
In the future, a more extensible entry format would be nice, but that
can be done later.
---
clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
clang/test/CodeGenCUDA/offloading-entries.cu | 78 +--
clang/test/Driver/linker-wrapper-image.c | 28 ---
.../llvm/Frontend/Offloading/Utility.h| 4 +
.../Frontend/Offloading/OffloadWrapper.cpp| 16
llvm/lib/Frontend/Offloading/Utility.cpp | 10 +++
6 files changed, 109 insertions(+), 58 deletions(-)
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d91511..0fc81491c40855d 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage,
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+ } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+ }
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94f..d46a25969e3ecd7 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
#include "Inputs/cuda.h"
+#define __managed__ __attribute__((managed))
+
//.
+// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8]
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z3foov = weak constant
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8]
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z6kernelv = weak constant
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8]
c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.var = we
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
@@ -1221,12 +1221,34 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ // TODO: Update the offloading entries struct to avoid this indirection.
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage,
Initializer,
+I.Var->getName(), /*InsertBefore=*/nullptr,
+llvm::GlobalVariable::NotThreadLocal,
+CGM.getContext().getTargetAddressSpace(LangAS::Default));
arsenm wrote:
No, this is not coming from the language. This should use
DL.getDefaultGlobalsAddressSpace
https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
https://github.com/jhuber6 updated
https://github.com/llvm/llvm-project/pull/123437
>From bed6550941c0fafe2975288e49957a5a36895cf2 Mon Sep 17 00:00:00 2001
From: Joseph Huber
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH 1/3] [HIP] Support managed variables using the new driver
Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.
In the future, a more extensible entry format would be nice, but that
can be done later.
---
clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
clang/test/CodeGenCUDA/offloading-entries.cu | 78 +--
clang/test/Driver/linker-wrapper-image.c | 28 ---
.../llvm/Frontend/Offloading/Utility.h| 4 +
.../Frontend/Offloading/OffloadWrapper.cpp| 16
llvm/lib/Frontend/Offloading/Utility.cpp | 10 +++
6 files changed, 109 insertions(+), 58 deletions(-)
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage,
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+ } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+ }
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
#include "Inputs/cuda.h"
+#define __managed__ __attribute__((managed))
+
//.
+// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8]
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z3foov = weak constant
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8]
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z6kernelv = weak constant
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8]
c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.var = weak c
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
@@ -353,6 +353,16 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, FunctionCallee RegVar = M.getOrInsertFunction( IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy); + // Get the __cudaRegisterSurface function declaration. jhuber6 wrote: Fixed https://github.com/llvm/llvm-project/pull/123437 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
@@ -353,6 +353,16 @@ Function *createRegisterGlobalsFunction(Module &M, bool IsHIP, FunctionCallee RegVar = M.getOrInsertFunction( IsHIP ? "__hipRegisterVar" : "__cudaRegisterVar", RegVarTy); + // Get the __cudaRegisterSurface function declaration. yxsamliu wrote: typo https://github.com/llvm/llvm-project/pull/123437 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
jhuber6 wrote: I'm currently working on redoing the offloading entry format. Can this land as an interim solution so I don't need to redo the work there? https://github.com/llvm/llvm-project/pull/123437 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
arsenm wrote: > Wonder if I should try really hard to update the struct we use for this > before the branch in 8 days, since if it's default in CUDA now it'd break ABI > for a release. Can you implement the new path under a switch? https://github.com/llvm/llvm-project/pull/123437 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
jhuber6 wrote: Wonder if I should try really hard to update the struct we use for this before the branch in 8 days, since if it's default in CUDA now it'd break ABI for a release. https://github.com/llvm/llvm-project/pull/123437 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
https://github.com/jhuber6 updated
https://github.com/llvm/llvm-project/pull/123437
>From bed6550941c0fafe2975288e49957a5a36895cf2 Mon Sep 17 00:00:00 2001
From: Joseph Huber
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH 1/2] [HIP] Support managed variables using the new driver
Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.
In the future, a more extensible entry format would be nice, but that
can be done later.
---
clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
clang/test/CodeGenCUDA/offloading-entries.cu | 78 +--
clang/test/Driver/linker-wrapper-image.c | 28 ---
.../llvm/Frontend/Offloading/Utility.h| 4 +
.../Frontend/Offloading/OffloadWrapper.cpp| 16
llvm/lib/Frontend/Offloading/Utility.cpp | 10 +++
6 files changed, 109 insertions(+), 58 deletions(-)
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage,
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+ } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+ }
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
#include "Inputs/cuda.h"
+#define __managed__ __attribute__((managed))
+
//.
+// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8]
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z3foov = weak constant
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8]
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z6kernelv = weak constant
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8]
c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.var = weak c
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
arsenm wrote:
Add a todo about the format change to use one struct
https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage,
Initializer,
arsenm wrote:
Should set the address space, probably to the default globals one
https://github.com/llvm/llvm-project/pull/123437
___
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: Joseph Huber (jhuber6)
Changes
Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.
In the future, a more extensible entry format would be nice, but that
can be done later.
---
Patch is 22.56 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/123437.diff
6 Files Affected:
- (modified) clang/lib/CodeGen/CGCUDANV.cpp (+25-6)
- (modified) clang/test/CodeGenCUDA/offloading-entries.cu (+36-42)
- (modified) clang/test/Driver/linker-wrapper-image.c (+18-10)
- (modified) llvm/include/llvm/Frontend/Offloading/Utility.h (+4)
- (modified) llvm/lib/Frontend/Offloading/OffloadWrapper.cpp (+16)
- (modified) llvm/lib/Frontend/Offloading/Utility.cpp (+10)
``diff
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage,
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+ } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+ }
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
#include "Inputs/cuda.h"
+#define __managed__ __attribute__((managed))
+
//.
+// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8]
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z3foov = weak constant
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8]
c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z6kernelv = weak constant
%struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr
@.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8]
c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry {
ptr @var, ptr @.offloading.entry_nam
[clang] [llvm] [HIP] Support managed variables using the new driver (PR #123437)
https://github.com/jhuber6 created
https://github.com/llvm/llvm-project/pull/123437
Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.
In the future, a more extensible entry format would be nice, but that
can be done later.
>From bed6550941c0fafe2975288e49957a5a36895cf2 Mon Sep 17 00:00:00 2001
From: Joseph Huber
Date: Fri, 17 Jan 2025 19:56:18 -0600
Subject: [PATCH] [HIP] Support managed variables using the new driver
Summary:
Previously, managed variables didn't work in rdc mode using the new
driver because we just didn't register them. This was previously ignored
because we didn't have enough space in the current struct format. This
patch amends that by just emitting a struct pair for the two variables
and using the single pointer.
In the future, a more extensible entry format would be nice, but that
can be done later.
---
clang/lib/CodeGen/CGCUDANV.cpp| 31 ++--
clang/test/CodeGenCUDA/offloading-entries.cu | 78 +--
clang/test/Driver/linker-wrapper-image.c | 28 ---
.../llvm/Frontend/Offloading/Utility.h| 4 +
.../Frontend/Offloading/OffloadWrapper.cpp| 16
llvm/lib/Frontend/Offloading/Utility.cpp | 10 +++
6 files changed, 109 insertions(+), 58 deletions(-)
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index ae14d74f2d9151..0fc81491c40855 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1221,12 +1221,31 @@ void CGNVCUDARuntime::createOffloadingEntries() {
? static_cast(llvm::offloading::OffloadGlobalNormalized)
: 0);
if (I.Flags.getKind() == DeviceVarFlags::Variable) {
- llvm::offloading::emitOffloadingEntry(
- M, I.Var, getDeviceSideName(I.D), VarSize,
- (I.Flags.isManaged() ? llvm::offloading::OffloadGlobalManagedEntry
- : llvm::offloading::OffloadGlobalEntry) |
- Flags,
- /*Data=*/0, Section);
+ if (I.Flags.isManaged()) {
+assert(I.Var->getName().ends_with(".managed") &&
+ "HIP managed variables not transformed");
+
+// Create a struct to contain the two variables.
+auto *ManagedVar = M.getNamedGlobal(
+I.Var->getName().drop_back(StringRef(".managed").size()));
+llvm::Constant *StructData[] = {ManagedVar, I.Var};
+llvm::Constant *Initializer = llvm::ConstantStruct::get(
+llvm::offloading::getManagedTy(M), StructData);
+auto *Struct = new llvm::GlobalVariable(
+M, llvm::offloading::getManagedTy(M),
+/*IsConstant=*/true, llvm::GlobalValue::PrivateLinkage,
Initializer,
+I.Var->getName());
+
+llvm::offloading::emitOffloadingEntry(
+M, Struct, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalManagedEntry | Flags,
+/*Data=*/static_cast(I.Var->getAlignment()), Section);
+ } else {
+llvm::offloading::emitOffloadingEntry(
+M, I.Var, getDeviceSideName(I.D), VarSize,
+llvm::offloading::OffloadGlobalEntry | Flags,
+/*Data=*/0, Section);
+ }
} else if (I.Flags.getKind() == DeviceVarFlags::Surface) {
llvm::offloading::emitOffloadingEntry(
M, I.Var, getDeviceSideName(I.D), VarSize,
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu
b/clang/test/CodeGenCUDA/offloading-entries.cu
index 259e3324e8ac94..d46a25969e3ecd 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
UTC_ARGS: --check-globals --global-value-regex ".offloading.entry.*" "managed.*"
// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-linux-gnu -fgpu-rdc \
// RUN: --offload-new-driver -emit-llvm -o - -x cuda %s | FileCheck \
// RUN: --check-prefix=CUDA %s
@@ -14,50 +14,68 @@
#include "Inputs/cuda.h"
+#define __managed__ __attribute__((managed))
+
//.
+// CUDA: @managed = global i32 undef, align 4
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8]
c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z3foov = weak constant
%struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr
@.offloading.entry_name, i64 0, i32 0, i32 0 }, section
"cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8]
c"_Z6kernelv\00", section ".llvm.rodata.offl
