gtbercea created this revision. Herald added a subscriber: jholewinski. Since OpenMP and CUDA share the same toolchain we need to disable:
- the lowering of variables to shared memory in the LLVM NVPTX backend - the emission of the shared depot - the emission of shared stack pointers when compiling: - CUDA programs - OpenMP programs that do not require data sharing. Repository: rL LLVM https://reviews.llvm.org/D40451 Files: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp test/OpenMP/nvptx_data_sharing.cpp Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -22,15 +22,15 @@ /// ========= In the worker function ========= /// -// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker(){{.*}}{ +// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() #0 // CK1: [[SHAREDARGS:%.+]] = alloca i8** // CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]]) // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]] // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]]) /// ========= In the kernel function ========= /// -// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() +// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() #1 // CK1: [[SHAREDARGS1:%.+]] = alloca i8** // CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] @@ -40,13 +40,17 @@ /// ========= In the data sharing wrapper function ========= /// -// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**){{.*}}{ +// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) #0 // CK1: [[SHAREDARGS2:%.+]] = alloca i8** // CK1: store i8** %2, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]] // CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32** // CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]] // CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]]) +/// ========= Attribute must contain +// CK1-NOT: attributes #0 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } +// CK1: attributes #1 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } + #endif Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -934,6 +934,8 @@ llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); if (!CapturedVars.empty()) { + // There's somehting to share, add the attribute + CGF.CurFn->addFnAttr("has-nvptx-shared-depot"); // Prepare for parallel region. Indicate the outlined function. Address SharedArgs = CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
Index: test/OpenMP/nvptx_data_sharing.cpp =================================================================== --- test/OpenMP/nvptx_data_sharing.cpp +++ test/OpenMP/nvptx_data_sharing.cpp @@ -22,15 +22,15 @@ /// ========= In the worker function ========= /// -// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker(){{.*}}{ +// CK1: define internal void @__omp_offloading_{{.*}}test_ds{{.*}}worker() #0 // CK1: [[SHAREDARGS:%.+]] = alloca i8** // CK1: call i1 @__kmpc_kernel_parallel(i8** %work_fn, i8*** [[SHAREDARGS]]) // CK1: [[SHARGSTMP:%.+]] = load i8**, i8*** [[SHAREDARGS]] // CK1: call void @__omp_outlined___wrapper{{.*}}({{.*}}, i8** [[SHARGSTMP]]) /// ========= In the kernel function ========= /// -// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() +// CK1: {{.*}}define void @__omp_offloading{{.*}}test_ds{{.*}}() #1 // CK1: [[SHAREDARGS1:%.+]] = alloca i8** // CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i8*** [[SHAREDARGS1]], i32 1) // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]] @@ -40,13 +40,17 @@ /// ========= In the data sharing wrapper function ========= /// -// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**){{.*}}{ +// CK1: {{.*}}define internal void @__omp_outlined___wrapper({{.*}}i8**) #0 // CK1: [[SHAREDARGS2:%.+]] = alloca i8** // CK1: store i8** %2, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]] // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]] // CK1: [[SHARGSTMP5:%.+]] = bitcast i8** [[SHARGSTMP4]] to i32** // CK1: [[SHARGSTMP6:%.+]] = load i32*, i32** [[SHARGSTMP5]] // CK1: call void @__omp_outlined__({{.*}}, i32* [[SHARGSTMP6]]) +/// ========= Attribute must contain +// CK1-NOT: attributes #0 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } +// CK1: attributes #1 = { {{.*}}"has-nvptx-shared-depot"{{.*}} } + #endif Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -934,6 +934,8 @@ llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy); if (!CapturedVars.empty()) { + // There's somehting to share, add the attribute + CGF.CurFn->addFnAttr("has-nvptx-shared-depot"); // Prepare for parallel region. Indicate the outlined function. Address SharedArgs = CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy,
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits