gtbercea created this revision.
Herald added a subscriber: jholewinski.

Since OpenMP and CUDA share the same toolchain we need to disable:

- the lowering of variables to shared memory in the LLVM NVPTX backend
- the emission of the shared depot
- the emission of shared stack pointers

when compiling:

- CUDA programs
- OpenMP programs that do not require data sharing.


Repository:
  rL LLVM

https://reviews.llvm.org/D40451

Files:
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
  test/OpenMP/nvptx_data_sharing.cpp


Index: test/OpenMP/nvptx_data_sharing.cpp
===================================================================
--- test/OpenMP/nvptx_data_sharing.cpp
+++ test/OpenMP/nvptx_data_sharing.cpp
@@ -22,15 +22,15 @@
 
 /// ========= In the worker function ========= ///
 
-// CK1: define internal void 
@__omp_offloading_{{.*}}test_ds{{.*}}worker(){{.*}}{
+// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() #0
 // CK1: [[SHAREDARGS:%.+]] = alloca i8**
 // CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]])
 // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]]
 // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]])
 
 /// ========= In the kernel function ========= ///
 
-// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}()
+// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() #1
 // CK1: [[SHAREDARGS1:%.+]] = alloca i8**
 // CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** 
[[SHAREDARGS1]], i32 1)
 // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]]
@@ -40,13 +40,17 @@
 
 /// ========= In the data sharing wrapper function ========= ///
 
-// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**){{.*}}{
+// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) #0
 // CK1: [[SHAREDARGS2:%.+]] = alloca i8**
 // CK1: store i8** %2, i8*** [[SHAREDARGS2]]
 // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]]
 // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]]
 // CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32**
 // CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]]
 // CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]])
 
+/// ========= Attribute must contain
+// CK1-NOT: attributes #0 = { {{.*}}"has-nvptx-shared-depot"{{.*}} }
+// CK1: attributes #1 = { {{.*}}"has-nvptx-shared-depot"{{.*}} }
+
 #endif
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -934,6 +934,8 @@
     llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
 
     if (!CapturedVars.empty()) {
+      // There's somehting to share, add the attribute
+      CGF.CurFn->addFnAttr("has-nvptx-shared-depot");
       // Prepare for parallel region. Indicate the outlined function.
       Address SharedArgs =
           CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,


Index: test/OpenMP/nvptx_data_sharing.cpp
===================================================================
--- test/OpenMP/nvptx_data_sharing.cpp
+++ test/OpenMP/nvptx_data_sharing.cpp
@@ -22,15 +22,15 @@
 
 /// ========= In the worker function ========= ///
 
-// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker(){{.*}}{
+// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() #0
 // CK1: [[SHAREDARGS:%.+]] = alloca i8**
 // CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]])
 // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]]
 // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]])
 
 /// ========= In the kernel function ========= ///
 
-// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}()
+// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() #1
 // CK1: [[SHAREDARGS1:%.+]] = alloca i8**
 // CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1)
 // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]]
@@ -40,13 +40,17 @@
 
 /// ========= In the data sharing wrapper function ========= ///
 
-// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**){{.*}}{
+// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) #0
 // CK1: [[SHAREDARGS2:%.+]] = alloca i8**
 // CK1: store i8** %2, i8*** [[SHAREDARGS2]]
 // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]]
 // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]]
 // CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32**
 // CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]]
 // CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]])
 
+/// ========= Attribute must contain
+// CK1-NOT: attributes #0 = { {{.*}}"has-nvptx-shared-depot"{{.*}} }
+// CK1: attributes #1 = { {{.*}}"has-nvptx-shared-depot"{{.*}} }
+
 #endif
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -934,6 +934,8 @@
     llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
 
     if (!CapturedVars.empty()) {
+      // There's somehting to share, add the attribute
+      CGF.CurFn->addFnAttr("has-nvptx-shared-depot");
       // Prepare for parallel region. Indicate the outlined function.
       Address SharedArgs =
           CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D40451: [Op... Gheorghe-Teodor Bercea via Phabricator via cfe-commits

Reply via email to