https://github.com/Artem-B updated https://github.com/llvm/llvm-project/pull/112028
>From 5dac14aab180fd965d996b47cf983b8c462fe703 Mon Sep 17 00:00:00 2001 From: Sergey Kozub <sko...@nvidia.com> Date: Tue, 2 Jul 2024 02:44:56 -0700 Subject: [PATCH 1/2] [CUDA] Add support for CUDA-12.6 and sm_100 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/include/clang/Basic/BuiltinsNVPTX.def | 10 ++++++++-- clang/include/clang/Basic/Cuda.h | 4 +++- clang/lib/Basic/Cuda.cpp | 4 ++++ clang/lib/Basic/Targets/NVPTX.cpp | 2 ++ clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 1 + clang/lib/Driver/ToolChains/Cuda.cpp | 3 +++ clang/test/Misc/target-invalid-cpu-note/nvptx.c | 1 + llvm/lib/Target/NVPTX/NVPTX.td | 5 +++-- 9 files changed, 27 insertions(+), 5 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 763bc3ac159322..1722397452a80a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -612,6 +612,8 @@ CUDA/HIP Language Changes CUDA Support ^^^^^^^^^^^^ +- Clang now supports CUDA SDK up to 12.6 +- Added support for sm_100 AIX Support ^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def index 6b7bce5bc00d4f..969dd9e41ebfa3 100644 --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -27,8 +27,10 @@ #pragma push_macro("SM_89") #pragma push_macro("SM_90") #pragma push_macro("SM_90a") +#pragma push_macro("SM_100") +#define SM_100 "sm_100" #define SM_90a "sm_90a" -#define SM_90 "sm_90|" SM_90a +#define SM_90 "sm_90|" SM_90a "|" SM_100 #define SM_89 "sm_89|" SM_90 #define SM_87 "sm_87|" SM_89 #define SM_86 "sm_86|" SM_87 @@ -63,7 +65,9 @@ #pragma push_macro("PTX83") #pragma push_macro("PTX84") #pragma push_macro("PTX85") -#define PTX85 "ptx85" +#pragma push_macro("PTX86") +#define PTX86 "ptx86" +#define PTX85 "ptx85|" PTX86 #define PTX84 "ptx84|" PTX85 #define PTX83 "ptx83|" PTX84 #define PTX82 "ptx82|" PTX83 @@ -1086,6 +1090,7 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("SM_89") #pragma pop_macro("SM_90") #pragma pop_macro("SM_90a") +#pragma pop_macro("SM_100") #pragma pop_macro("PTX42") #pragma pop_macro("PTX60") #pragma pop_macro("PTX61") @@ -1108,3 +1113,4 @@ TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) #pragma pop_macro("PTX83") #pragma pop_macro("PTX84") #pragma pop_macro("PTX85") +#pragma pop_macro("PTX86") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 83699f8897f663..a18e62620dd5d0 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -43,9 +43,10 @@ enum class CudaVersion { CUDA_123, CUDA_124, CUDA_125, + CUDA_126, FULLY_SUPPORTED = CUDA_123, PARTIALLY_SUPPORTED = - CUDA_125, // Partially supported. Proceed with a warning. + CUDA_126, // Partially supported. Proceed with a warning. NEW = 10000, // Too new. Issue a warning, but allow using it. }; const char *CudaVersionToString(CudaVersion V); @@ -78,6 +79,7 @@ enum class OffloadArch { SM_89, SM_90, SM_90a, + SM_100, GFX600, GFX601, GFX602, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index faf3878f064d20..72d9bd89c36e71 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -43,6 +43,7 @@ static const CudaVersionMapEntry CudaNameVersionMap[] = { CUDA_ENTRY(12, 3), CUDA_ENTRY(12, 4), CUDA_ENTRY(12, 5), + CUDA_ENTRY(12, 6), {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())}, {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. }; @@ -96,6 +97,7 @@ static const OffloadArchToStringMap arch_names[] = { SM(89), // Ada Lovelace SM(90), // Hopper SM(90a), // Hopper + SM(100), // Blackwell GFX(600), // gfx600 GFX(601), // gfx601 GFX(602), // gfx602 @@ -221,6 +223,8 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { return CudaVersion::CUDA_118; case OffloadArch::SM_90a: return CudaVersion::CUDA_120; + case OffloadArch::SM_100: + return CudaVersion::CUDA_126; default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 43b653dc52ce0d..88a0dbde52d52b 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -281,6 +281,8 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case OffloadArch::SM_90: case OffloadArch::SM_90a: return "900"; + case OffloadArch::SM_100: + return "1000"; } llvm_unreachable("unhandled OffloadArch"); }(); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index c920d93957b16e..35ff75416cb776 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2274,6 +2274,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::SM_89: case OffloadArch::SM_90: case OffloadArch::SM_90a: + case OffloadArch::SM_100: case OffloadArch::GFX600: case OffloadArch::GFX601: case OffloadArch::GFX602: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 7a70cf1c5694fd..c1beca840dde5b 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -87,6 +87,8 @@ CudaVersion getCudaVersion(uint32_t raw_version) { return CudaVersion::CUDA_124; if (raw_version < 12060) return CudaVersion::CUDA_125; + if (raw_version < 12070) + return CudaVersion::CUDA_126; return CudaVersion::NEW; } @@ -669,6 +671,7 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, case CudaVersion::CUDA_##CUDA_VER: \ PtxFeature = "+ptx" #PTX_VER; \ break; + CASE_CUDA_VERSION(126, 86); CASE_CUDA_VERSION(125, 85); CASE_CUDA_VERSION(124, 84); CASE_CUDA_VERSION(123, 83); diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c index af4ccff6b07b6c..43524ab2906bf9 100644 --- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c +++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c @@ -26,6 +26,7 @@ // CHECK-SAME: {{^}}, sm_89 // CHECK-SAME: {{^}}, sm_90 // CHECK-SAME: {{^}}, sm_90a +// CHECK-SAME: {{^}}, sm_100 // CHECK-SAME: {{^}}, gfx600 // CHECK-SAME: {{^}}, gfx601 // CHECK-SAME: {{^}}, gfx602 diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td index bb4549a5e60782..9af8715ef52ae7 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.td +++ b/llvm/lib/Target/NVPTX/NVPTX.td @@ -35,14 +35,14 @@ class FeaturePTX<int version>: "Use PTX version " # version>; foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53, - 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in + 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90, 100] in def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>; def SM90a: FeatureSM<"90a", 901>; foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65, 70, 71, 72, 73, 74, 75, 76, 77, 78, - 80, 81, 82, 83, 84, 85] in + 80, 81, 82, 83, 84, 85, 86] in def PTX#version: FeaturePTX<version>; //===----------------------------------------------------------------------===// @@ -73,6 +73,7 @@ def : Proc<"sm_87", [SM87, PTX74]>; def : Proc<"sm_89", [SM89, PTX78]>; def : Proc<"sm_90", [SM90, PTX78]>; def : Proc<"sm_90a", [SM90a, PTX80]>; +def : Proc<"sm_100", [SM100, PTX86]>; def NVPTXInstrInfo : InstrInfo { } >From aee63aee2c275d20596e9637905573815c01adee Mon Sep 17 00:00:00 2001 From: Artem Belevich <t...@google.com> Date: Mon, 14 Oct 2024 10:43:56 -0700 Subject: [PATCH 2/2] Use ptx 8.5 with cuda-12.6, and enable ptx 8.6 with the newer CUDA versions --- clang/lib/Basic/Cuda.cpp | 2 +- clang/lib/Driver/ToolChains/Cuda.cpp | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 72d9bd89c36e71..c757bb7a165982 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -224,7 +224,7 @@ CudaVersion MinVersionForOffloadArch(OffloadArch A) { case OffloadArch::SM_90a: return CudaVersion::CUDA_120; case OffloadArch::SM_100: - return CudaVersion::CUDA_126; + return CudaVersion::NEW; // TODO: use specific CUDA version once it's public. default: llvm_unreachable("invalid enum"); } diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index c1beca840dde5b..0a401fa5afcbb0 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -671,7 +671,11 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple, case CudaVersion::CUDA_##CUDA_VER: \ PtxFeature = "+ptx" #PTX_VER; \ break; - CASE_CUDA_VERSION(126, 86); + // TODO: Use specific CUDA version once it's public. + case clang::CudaVersion::NEW: + PtxFeature = "+ptx86"; + break; + CASE_CUDA_VERSION(126, 85); CASE_CUDA_VERSION(125, 85); CASE_CUDA_VERSION(124, 84); CASE_CUDA_VERSION(123, 83); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits