llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver Author: Artem Belevich (Artem-B) <details> <summary>Changes</summary> Implements #<!-- -->172937 https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/#virtual-architecture-macros > The architecture list macro `__CUDA_ARCH_LIST__` is a list of comma-separated `__CUDA_ARCH__` values for each of the virtual architectures specified in the compiler invocation. The list is sorted in numerically ascending order. Note that unlike NVCC which defines the macro for all C/C++/CUDA compilations done with nvcc, clang defines the macro *only* for CUDA compilations. --- Full diff: https://github.com/llvm/llvm-project/pull/175260.diff 5 Files Affected: - (modified) clang/include/clang/Basic/Cuda.h (+6) - (modified) clang/lib/Basic/Cuda.cpp (+84) - (modified) clang/lib/Basic/Targets/NVPTX.cpp (+8-145) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+22) - (added) clang/test/Driver/cuda-arch-list.cu (+56) ``````````diff diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 5639710f43aa5..78fc32295c88c 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -84,6 +84,12 @@ CudaVersion ToCudaVersion(llvm::VersionTuple); bool CudaFeatureEnabled(llvm::VersionTuple, CudaFeature); bool CudaFeatureEnabled(CudaVersion, CudaFeature); +/// Get the numeric ID (e.g. 700) of a CUDA architecture. +unsigned CudaArchToID(OffloadArch Arch); + +/// Check if the CUDA architecture is an accelerated variant (e.g. sm_90a). +bool IsNVIDIAAcceleratedOffloadArch(OffloadArch Arch); + } // namespace clang #endif diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 51769eb425923..514fa2f2a4ca7 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -173,4 +173,88 @@ bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) { } llvm_unreachable("Unknown CUDA feature."); } + +unsigned CudaArchToID(OffloadArch Arch) { + switch (Arch) { + case OffloadArch::SM_20: + return 200; + case OffloadArch::SM_21: + return 210; + case OffloadArch::SM_30: + return 300; + case OffloadArch::SM_32_: + return 320; + case OffloadArch::SM_35: + return 350; + case OffloadArch::SM_37: + return 370; + case OffloadArch::SM_50: + return 500; + case OffloadArch::SM_52: + return 520; + case OffloadArch::SM_53: + return 530; + case OffloadArch::SM_60: + return 600; + case OffloadArch::SM_61: + return 610; + case OffloadArch::SM_62: + return 620; + case OffloadArch::SM_70: + return 700; + case OffloadArch::SM_72: + return 720; + case OffloadArch::SM_75: + return 750; + case OffloadArch::SM_80: + return 800; + case OffloadArch::SM_86: + return 860; + case OffloadArch::SM_87: + return 870; + case OffloadArch::SM_88: + return 880; + case OffloadArch::SM_89: + return 890; + case OffloadArch::SM_90: + case OffloadArch::SM_90a: + return 900; + case OffloadArch::SM_100: + case OffloadArch::SM_100a: + return 1000; + case OffloadArch::SM_101: + case OffloadArch::SM_101a: + return 1010; + case OffloadArch::SM_103: + case OffloadArch::SM_103a: + return 1030; + case OffloadArch::SM_110: + case OffloadArch::SM_110a: + return 1100; + case OffloadArch::SM_120: + case OffloadArch::SM_120a: + return 1200; + case OffloadArch::SM_121: + case OffloadArch::SM_121a: + return 1210; + default: + break; + } + llvm_unreachable("invalid NVIDIA GPU architecture"); +} + +bool IsNVIDIAAcceleratedOffloadArch(OffloadArch Arch) { + switch (Arch) { + case OffloadArch::SM_90a: + case OffloadArch::SM_100a: + case OffloadArch::SM_101a: + case OffloadArch::SM_103a: + case OffloadArch::SM_110a: + case OffloadArch::SM_120a: + case OffloadArch::SM_121a: + return true; + default: + return false; + } +} } // namespace clang diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 06db3aae0c755..5b399b2a5a080 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -174,155 +174,18 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__NVPTX__"); // Skip setting architecture dependent macros if undefined. - if (GPU == OffloadArch::UNUSED && !HostTarget) + if (!IsNVIDIAOffloadArch(GPU)) return; if (Opts.CUDAIsDevice || Opts.OpenMPIsTargetDevice || !HostTarget) { // Set __CUDA_ARCH__ for the GPU specified. - llvm::StringRef CUDAArchCode = [this] { - switch (GPU) { - case OffloadArch::GFX600: - case OffloadArch::GFX601: - case OffloadArch::GFX602: - case OffloadArch::GFX700: - case OffloadArch::GFX701: - case OffloadArch::GFX702: - case OffloadArch::GFX703: - case OffloadArch::GFX704: - case OffloadArch::GFX705: - case OffloadArch::GFX801: - case OffloadArch::GFX802: - case OffloadArch::GFX803: - case OffloadArch::GFX805: - case OffloadArch::GFX810: - case OffloadArch::GFX9_GENERIC: - case OffloadArch::GFX900: - case OffloadArch::GFX902: - case OffloadArch::GFX904: - case OffloadArch::GFX906: - case OffloadArch::GFX908: - case OffloadArch::GFX909: - case OffloadArch::GFX90a: - case OffloadArch::GFX90c: - case OffloadArch::GFX9_4_GENERIC: - case OffloadArch::GFX942: - case OffloadArch::GFX950: - case OffloadArch::GFX10_1_GENERIC: - case OffloadArch::GFX1010: - case OffloadArch::GFX1011: - case OffloadArch::GFX1012: - case OffloadArch::GFX1013: - case OffloadArch::GFX10_3_GENERIC: - case OffloadArch::GFX1030: - case OffloadArch::GFX1031: - case OffloadArch::GFX1032: - case OffloadArch::GFX1033: - case OffloadArch::GFX1034: - case OffloadArch::GFX1035: - case OffloadArch::GFX1036: - case OffloadArch::GFX11_GENERIC: - case OffloadArch::GFX1100: - case OffloadArch::GFX1101: - case OffloadArch::GFX1102: - case OffloadArch::GFX1103: - case OffloadArch::GFX1150: - case OffloadArch::GFX1151: - case OffloadArch::GFX1152: - case OffloadArch::GFX1153: - case OffloadArch::GFX12_GENERIC: - case OffloadArch::GFX1200: - case OffloadArch::GFX1201: - case OffloadArch::GFX1250: - case OffloadArch::GFX1251: - case OffloadArch::AMDGCNSPIRV: - case OffloadArch::Generic: - case OffloadArch::GRANITERAPIDS: - case OffloadArch::BMG_G21: - case OffloadArch::LAST: - break; - case OffloadArch::UNKNOWN: - assert(false && "No GPU arch when compiling CUDA device code."); - return ""; - case OffloadArch::UNUSED: - case OffloadArch::SM_20: - return "200"; - case OffloadArch::SM_21: - return "210"; - case OffloadArch::SM_30: - return "300"; - case OffloadArch::SM_32_: - return "320"; - case OffloadArch::SM_35: - return "350"; - case OffloadArch::SM_37: - return "370"; - case OffloadArch::SM_50: - return "500"; - case OffloadArch::SM_52: - return "520"; - case OffloadArch::SM_53: - return "530"; - case OffloadArch::SM_60: - return "600"; - case OffloadArch::SM_61: - return "610"; - case OffloadArch::SM_62: - return "620"; - case OffloadArch::SM_70: - return "700"; - case OffloadArch::SM_72: - return "720"; - case OffloadArch::SM_75: - return "750"; - case OffloadArch::SM_80: - return "800"; - case OffloadArch::SM_86: - return "860"; - case OffloadArch::SM_87: - return "870"; - case OffloadArch::SM_88: - return "880"; - case OffloadArch::SM_89: - return "890"; - case OffloadArch::SM_90: - case OffloadArch::SM_90a: - return "900"; - case OffloadArch::SM_100: - case OffloadArch::SM_100a: - return "1000"; - case OffloadArch::SM_101: - case OffloadArch::SM_101a: - return "1010"; - case OffloadArch::SM_103: - case OffloadArch::SM_103a: - return "1030"; - case OffloadArch::SM_110: - case OffloadArch::SM_110a: - return "1100"; - case OffloadArch::SM_120: - case OffloadArch::SM_120a: - return "1200"; - case OffloadArch::SM_121: - case OffloadArch::SM_121a: - return "1210"; - } - llvm_unreachable("unhandled OffloadArch"); - }(); - Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode); - switch(GPU) { - case OffloadArch::SM_90a: - case OffloadArch::SM_100a: - case OffloadArch::SM_101a: - case OffloadArch::SM_103a: - case OffloadArch::SM_110a: - case OffloadArch::SM_120a: - case OffloadArch::SM_121a: - Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + CUDAArchCode.drop_back() + "_ALL", "1"); - break; - default: - // Do nothing if this is not an enhanced architecture. - break; - } + unsigned ArchID = CudaArchToID(GPU); + Builder.defineMacro("__CUDA_ARCH__", llvm::Twine(ArchID)); + + if (IsNVIDIAAcceleratedOffloadArch(GPU)) + Builder.defineMacro("__CUDA_ARCH_FEAT_SM" + llvm::Twine(ArchID / 10) + + "_ALL", + "1"); } } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 4399eb475be75..4ca98600d6e93 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1069,6 +1069,28 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, {options::OPT_D, options::OPT_U, options::OPT_I_Group, options::OPT_F, options::OPT_embed_dir_EQ}); + if (C.isOffloadingHostKind(Action::OFK_Cuda) || + JA.isDeviceOffloading(Action::OFK_Cuda)) { + // Collect all enabled NVPTX architectures. + std::set<unsigned> ArchIDs; + for (auto &I : llvm::make_range(C.getOffloadToolChains(Action::OFK_Cuda))) { + const ToolChain *TC = I.second; + for (StringRef Arch : + D.getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, *TC)) { + OffloadArch OA = StringToOffloadArch(Arch); + if (IsNVIDIAOffloadArch(OA)) + ArchIDs.insert(CudaArchToID(OA)); + } + } + + if (!ArchIDs.empty()) { + SmallString<128> List; + llvm::raw_svector_ostream OS(List); + llvm::interleave(ArchIDs, OS, ","); + CmdArgs.push_back(Args.MakeArgString("-D__CUDA_ARCH_LIST__=" + List)); + } + } + // Add -Wp, and -Xpreprocessor if using the preprocessor. // FIXME: There is a very unfortunate problem here, some troubled diff --git a/clang/test/Driver/cuda-arch-list.cu b/clang/test/Driver/cuda-arch-list.cu new file mode 100644 index 0000000000000..84efeb1499708 --- /dev/null +++ b/clang/test/Driver/cuda-arch-list.cu @@ -0,0 +1,56 @@ +// Checks that __CUDA_ARCH_LIST__ is defined correctly for both host and device +// subcompilations. + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib \ +// RUN: --offload-arch=sm_60 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=DEVICE60,HOST %s + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib \ +// RUN: --offload-arch=sm_60 --offload-arch=sm_70 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=DEVICE60-60-70,DEVICE70-60-70,HOST-60-70 %s + +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib \ +// RUN: --offload-arch=sm_70 --offload-arch=sm_60 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=DEVICE60-60-70,DEVICE70-60-70,HOST-60-70 %s + +// Verify that it works with no explicit arch (defaults to sm_52) +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib \ +// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=DEVICE52,HOST52 %s + +// Verify that --no-offload-arch negates preceding --offload-arch +// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nocudainc -nocudalib \ +// RUN: --offload-arch=sm_60 --offload-arch=sm_70 --no-offload-arch=sm_60 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=DEVICE70-ONLY,HOST70-ONLY %s + +// DEVICE60: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// DEVICE60-SAME: "-target-cpu" "sm_60" +// DEVICE60-SAME: "-D__CUDA_ARCH_LIST__=600" + +// HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// HOST-SAME: "-D__CUDA_ARCH_LIST__=600" + +// DEVICE60-60-70: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// DEVICE60-60-70-SAME: "-target-cpu" "sm_60" +// DEVICE60-60-70-SAME: "-D__CUDA_ARCH_LIST__=600,700" + +// DEVICE70-60-70: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// DEVICE70-60-70-SAME: "-target-cpu" "sm_70" +// DEVICE70-60-70-SAME: "-D__CUDA_ARCH_LIST__=600,700" + +// HOST-60-70: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// HOST-60-70-SAME: "-D__CUDA_ARCH_LIST__=600,700" + +// DEVICE52: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// DEVICE52-SAME: "-target-cpu" "sm_52" +// DEVICE52-SAME: "-D__CUDA_ARCH_LIST__=520" + +// HOST52: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// HOST52-SAME: "-D__CUDA_ARCH_LIST__=520" + +// DEVICE70-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda" +// DEVICE70-ONLY-SAME: "-target-cpu" "sm_70" +// DEVICE70-ONLY-SAME: "-D__CUDA_ARCH_LIST__=700" + +// HOST70-ONLY: "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// HOST70-ONLY-SAME: "-D__CUDA_ARCH_LIST__=700" `````````` </details> https://github.com/llvm/llvm-project/pull/175260 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
