Author: boxu.zhang Date: 2023-07-13T16:54:57-07:00 New Revision: f05b58a9468cc2990678e06bc51df56b30344807
URL: https://github.com/llvm/llvm-project/commit/f05b58a9468cc2990678e06bc51df56b30344807 DIFF: https://github.com/llvm/llvm-project/commit/f05b58a9468cc2990678e06bc51df56b30344807.diff LOG: [clang] Support '-fgpu-default-stream=per-thread' for NVIDIA CUDA I'm using clang to compile CUDA code. And just found that clang doesn't support the per-thread stream option for NV CUDA. I don't know if there is another solution. Reviewed By: tra Differential Revision: https://reviews.llvm.org/D154822 Added: Modified: clang/lib/CodeGen/CGCUDANV.cpp clang/lib/Frontend/InitPreprocessor.cpp clang/test/CodeGenCUDA/Inputs/cuda.h clang/test/CodeGenCUDA/kernel-call.cu Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index e78fe175855e75..08769c98dc298a 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -358,9 +358,13 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF, TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl(); DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl); std::string KernelLaunchAPI = "LaunchKernel"; - if (CGF.getLangOpts().HIP && CGF.getLangOpts().GPUDefaultStream == - LangOptions::GPUDefaultStreamKind::PerThread) - KernelLaunchAPI = KernelLaunchAPI + "_spt"; + if (CGF.getLangOpts().GPUDefaultStream == + LangOptions::GPUDefaultStreamKind::PerThread) { + if (CGF.getLangOpts().HIP) + KernelLaunchAPI = KernelLaunchAPI + "_spt"; + else if (CGF.getLangOpts().CUDA) + KernelLaunchAPI = KernelLaunchAPI + "_ptsz"; + } auto LaunchKernelName = addPrefixToName(KernelLaunchAPI); IdentifierInfo &cudaLaunchKernelII = CGM.getContext().Idents.get(LaunchKernelName); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 9a83bec3166001..16dd0c01bcb443 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -574,6 +574,9 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__CLANG_RDC__"); if (!LangOpts.HIP) Builder.defineMacro("__CUDA__"); + if (LangOpts.GPUDefaultStream == + LangOptions::GPUDefaultStreamKind::PerThread) + Builder.defineMacro("CUDA_API_PER_THREAD_DEFAULT_STREAM"); } if (LangOpts.HIP) { Builder.defineMacro("__HIP__"); diff --git a/clang/test/CodeGenCUDA/Inputs/cuda.h b/clang/test/CodeGenCUDA/Inputs/cuda.h index 25f64ccefe9375..06399659c0b53e 100644 --- a/clang/test/CodeGenCUDA/Inputs/cuda.h +++ b/clang/test/CodeGenCUDA/Inputs/cuda.h @@ -58,6 +58,10 @@ extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize, extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream); +extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim, + dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream); + #endif extern "C" __device__ int printf(const char*, ...); diff --git a/clang/test/CodeGenCUDA/kernel-call.cu b/clang/test/CodeGenCUDA/kernel-call.cu index 40407f1c29a38c..687c55a78e0047 100644 --- a/clang/test/CodeGenCUDA/kernel-call.cu +++ b/clang/test/CodeGenCUDA/kernel-call.cu @@ -2,6 +2,9 @@ // RUN: | FileCheck %s --check-prefixes=CUDA-OLD,CHECK // RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefixes=CUDA-NEW,CHECK +// RUN: %clang_cc1 -target-sdk-version=9.2 -emit-llvm %s -o - \ +// RUN: -fgpu-default-stream=per-thread -DCUDA_API_PER_THREAD_DEFAULT_STREAM \ +// RUN: | FileCheck %s --check-prefixes=CUDA-PTH,CHECK // RUN: %clang_cc1 -x hip -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefixes=HIP-OLD,CHECK // RUN: %clang_cc1 -fhip-new-launch-api -x hip -emit-llvm %s -o - \ @@ -25,6 +28,7 @@ // CUDA-OLD: call{{.*}}cudaLaunch // CUDA-NEW: call{{.*}}__cudaPopCallConfiguration // CUDA-NEW: call{{.*}}cudaLaunchKernel +// CUDA-PTH: call{{.*}}cudaLaunchKernel_ptsz __global__ void g1(int x) {} // CHECK-LABEL: define{{.*}}main _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits