Author: Alex Voicu Date: 2024-06-25T12:19:28+01:00 New Revision: 9acb533c38be833ec1d8daa06e127a9de8f0a5ef
URL: https://github.com/llvm/llvm-project/commit/9acb533c38be833ec1d8daa06e127a9de8f0a5ef DIFF: https://github.com/llvm/llvm-project/commit/9acb533c38be833ec1d8daa06e127a9de8f0a5ef.diff LOG: [clang][Driver] Add HIPAMD Driver support for AMDGCN flavoured SPIR-V (#95061) This patch augments the HIPAMD driver to allow it to target AMDGCN flavoured SPIR-V compilation. It's mostly straightforward, as we re-use some of the existing SPIRV infra, however there are a few notable additions: - we introduce an `amdgcnspirv` offload arch, rather than relying on using `generic` (this is already fairly overloaded) or simply using `spirv` or `spirv64` (we'll want to use these to denote unflavoured SPIRV, once we bring up that capability) - initially it is won't be possible to mix-in SPIR-V and concrete AMDGPU targets, as it would require some relatively intrusive surgery in the HIPAMD Toolchain and the Driver to deal with two triples (`spirv64-amd-amdhsa` and `amdgcn-amd-amdhsa`, respectively) - in order to retain user provided compiler flags and have them available at JIT time, we rely on embedding the command line via `-fembed-bitcode=marker`, which the bitcode writer had previously not implemented for SPIRV; we only allow it conditionally for AMDGCN flavoured SPIRV, and it is handled correctly by the Translator (it ends up as a string literal) Once the SPIRV BE is no longer experimental we'll switch to using that rather than the translator. There's some additional work that'll come via a separate PR around correctly piping through AMDGCN's implementation of `printf`, for now we merely handle its flags correctly. Added: Modified: clang/include/clang/Basic/Cuda.h clang/lib/Basic/Cuda.cpp clang/lib/Basic/Targets/NVPTX.cpp clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp clang/lib/CodeGen/CodeGenModule.cpp clang/lib/Driver/Driver.cpp clang/lib/Driver/ToolChains/Clang.cpp clang/lib/Driver/ToolChains/HIPAMD.cpp clang/lib/Driver/ToolChains/HIPAMD.h clang/test/Driver/cuda-arch-translation.cu clang/test/Frontend/embed-bitcode.ll clang/test/Misc/target-invalid-cpu-note.c llvm/lib/Bitcode/Writer/BitcodeWriter.cpp Removed: ################################################################################ diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 0d5e38e825aa7..01cfe286c491b 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -128,6 +128,7 @@ enum class CudaArch { GFX12_GENERIC, GFX1200, GFX1201, + AMDGCNSPIRV, Generic, // A processor model named 'generic' if the target backend defines a // public one. LAST, diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index 1d96a929f95d8..af99c4d61021e 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -148,6 +148,7 @@ static const CudaArchToStringMap arch_names[] = { {CudaArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, GFX(1200), // gfx1200 GFX(1201), // gfx1201 + {CudaArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, {CudaArch::Generic, "generic", ""}, // clang-format on }; diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index ff7d2f1f92aa4..8e9006853db65 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -232,6 +232,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case CudaArch::GFX12_GENERIC: case CudaArch::GFX1200: case CudaArch::GFX1201: + case CudaArch::AMDGCNSPIRV: case CudaArch::Generic: case CudaArch::LAST: break; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 6e9a1bacd9bf5..6df34774334fa 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -3541,6 +3541,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::GFX12_GENERIC: case CudaArch::GFX1200: case CudaArch::GFX1201: + case CudaArch::AMDGCNSPIRV: case CudaArch::Generic: case CudaArch::UNUSED: case CudaArch::UNKNOWN: diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 76534475e88f7..652f519d82488 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -907,7 +907,8 @@ void CodeGenModule::Release() { if (Context.getTargetInfo().getTriple().isWasm()) EmitMainVoidAlias(); - if (getTriple().isAMDGPU()) { + if (getTriple().isAMDGPU() || + (getTriple().isSPIRV() && getTriple().getVendor() == llvm::Triple::AMD)) { // Emit amdhsa_code_object_version module flag, which is code object version // times 100. if (getTarget().getTargetOpts().CodeObjectVersion != diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 33ab7cc3f3968..6823f5424cef0 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -147,6 +147,14 @@ getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, static std::optional<llvm::Triple> getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { if (!Args.hasArg(options::OPT_offload_EQ)) { + auto OffloadArchs = Args.getAllArgValues(options::OPT_offload_arch_EQ); + if (llvm::find(OffloadArchs, "amdgcnspirv") != OffloadArchs.cend()) { + if (OffloadArchs.size() == 1) + return llvm::Triple("spirv64-amd-amdhsa"); + // Mixing specific & SPIR-V compilation is not supported for now. + D.Diag(diag::err_drv_only_one_offload_target_supported); + return std::nullopt; + } return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. } auto TT = getOffloadTargetTriple(D, Args); @@ -3231,10 +3239,14 @@ class OffloadingActionBuilder final { // supported GPUs. sm_20 code should work correctly, if // suboptimally, on all newer GPUs. if (GpuArchList.empty()) { - if (ToolChains.front()->getTriple().isSPIRV()) - GpuArchList.push_back(CudaArch::Generic); - else + if (ToolChains.front()->getTriple().isSPIRV()) { + if (ToolChains.front()->getTriple().getVendor() == llvm::Triple::AMD) + GpuArchList.push_back(CudaArch::AMDGCNSPIRV); + else + GpuArchList.push_back(CudaArch::Generic); + } else { GpuArchList.push_back(DefaultCudaArch); + } } return Error; @@ -6501,9 +6513,11 @@ const ToolChain &Driver::getOffloadingDeviceToolChain( // things. switch (TargetDeviceOffloadKind) { case Action::OFK_HIP: { - if (Target.getArch() == llvm::Triple::amdgcn && - Target.getVendor() == llvm::Triple::AMD && - Target.getOS() == llvm::Triple::AMDHSA) + if (((Target.getArch() == llvm::Triple::amdgcn || + Target.getArch() == llvm::Triple::spirv64) && + Target.getVendor() == llvm::Triple::AMD && + Target.getOS() == llvm::Triple::AMDHSA) || + !Args.hasArgNoClaim(options::OPT_offload_EQ)) TC = std::make_unique<toolchains::HIPAMDToolChain>(*this, Target, HostTC, Args); else if (Target.getArch() == llvm::Triple::spirv64 && diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2ce9e2f4bcfcd..c0f6bc0c2e45a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4939,7 +4939,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(NormalizedTriple)); if (JA.isDeviceOffloading(Action::OFK_HIP) && - getToolChain().getTriple().isAMDGPU()) { + (getToolChain().getTriple().isAMDGPU() || + (getToolChain().getTriple().isSPIRV() && + getToolChain().getTriple().getVendor() == llvm::Triple::AMD))) { // Device side compilation printf if (Args.getLastArg(options::OPT_mprintf_kind_EQ)) { CmdArgs.push_back(Args.MakeArgString( diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 34236e8bcf949..c35b0febb262d 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -10,6 +10,7 @@ #include "AMDGPU.h" #include "CommonArgs.h" #include "HIPUtility.h" +#include "SPIRV.h" #include "clang/Basic/Cuda.h" #include "clang/Basic/TargetID.h" #include "clang/Driver/Compilation.h" @@ -193,6 +194,33 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, Lld, LldArgs, Inputs, Output)); } +// For SPIR-V the inputs for the job are device AMDGCN SPIR-V flavoured bitcode +// and the output is either a compiled SPIR-V binary or bitcode (-emit-llvm). It +// calls llvm-link and then the llvm-spirv translator. Once the SPIR-V BE will +// be promoted from experimental, we will switch to using that. TODO: consider +// if we want to run any targeted optimisations over IR here, over generic +// SPIR-V. +void AMDGCN::Linker::constructLinkAndEmitSpirvCommand( + Compilation &C, const JobAction &JA, const InputInfoList &Inputs, + const InputInfo &Output, const llvm::opt::ArgList &Args) const { + assert(!Inputs.empty() && "Must have at least one input."); + + constructLlvmLinkCommand(C, JA, Inputs, Output, Args); + + // Linked BC is now in Output + + // Emit SPIR-V binary. + llvm::opt::ArgStringList TrArgs{ + "--spirv-max-version=1.6", + "--spirv-ext=+all", + "--spirv-allow-extra-diexpressions", + "--spirv-allow-unknown-intrinsics", + "--spirv-lower-const-expr", + "--spirv-preserve-auxdata", + "--spirv-debug-info-version=nonsemantic-shader-200"}; + SPIRV::constructTranslateCommand(C, *this, JA, Output, Output, TrArgs); +} + // For amdgcn the inputs of the linker job are device bitcode and output is // either an object file or bitcode (-emit-llvm). It calls llvm-link, opt, // llc, then lld steps. @@ -214,6 +242,9 @@ void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (JA.getType() == types::TY_LLVM_BC) return constructLlvmLinkCommand(C, JA, Inputs, Output, Args); + if (getToolChain().getTriple().isSPIRV()) + return constructLinkAndEmitSpirvCommand(C, JA, Inputs, Output, Args); + return constructLldCommand(C, JA, Inputs, Output, Args); } @@ -270,6 +301,13 @@ void HIPAMDToolChain::addClangTargetOptions( CC1Args.push_back("-fapply-global-visibility-to-externs"); } + // For SPIR-V we embed the command-line into the generated binary, in order to + // retrieve it at JIT time and be able to do target specific compilation with + // options that match the user-supplied ones. + if (getTriple().isSPIRV() && + !DriverArgs.hasArg(options::OPT_fembed_bitcode_marker)) + CC1Args.push_back("-fembed-bitcode=marker"); + for (auto BCFile : getDeviceLibs(DriverArgs)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); @@ -303,7 +341,8 @@ HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } Tool *HIPAMDToolChain::buildLinker() const { - assert(getTriple().getArch() == llvm::Triple::amdgcn); + assert(getTriple().getArch() == llvm::Triple::amdgcn || + getTriple().getArch() == llvm::Triple::spirv64); return new tools::AMDGCN::Linker(*this); } @@ -358,7 +397,9 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, llvm::SmallVector<ToolChain::BitCodeLibraryInfo, 12> HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs) const { llvm::SmallVector<BitCodeLibraryInfo, 12> BCLibs; - if (DriverArgs.hasArg(options::OPT_nogpulib)) + if (DriverArgs.hasArg(options::OPT_nogpulib) || + (getTriple().getArch() == llvm::Triple::spirv64 && + getTriple().getVendor() == llvm::Triple::AMD)) return {}; ArgStringList LibraryPaths; diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h index d81a9733014cc..c31894e22c5c8 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.h +++ b/clang/lib/Driver/ToolChains/HIPAMD.h @@ -40,6 +40,10 @@ class LLVM_LIBRARY_VISIBILITY Linker final : public Tool { const InputInfoList &Inputs, const InputInfo &Output, const llvm::opt::ArgList &Args) const; + void constructLinkAndEmitSpirvCommand(Compilation &C, const JobAction &JA, + const InputInfoList &Inputs, + const InputInfo &Output, + const llvm::opt::ArgList &Args) const; }; } // end namespace AMDGCN diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu index ff97f2dbda6c5..e96191cc9d418 100644 --- a/clang/test/Driver/cuda-arch-translation.cu +++ b/clang/test/Driver/cuda-arch-translation.cu @@ -59,6 +59,8 @@ // RUN: | FileCheck -check-prefixes=HIP,GFX900 %s // RUN: %clang -x hip -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=gfx902 -nogpuinc -nogpulib %s 2>&1 \ // RUN: | FileCheck -check-prefixes=HIP,GFX902 %s +// RUN: %clang -x hip -### --target=x86_64-linux-gnu -c --cuda-gpu-arch=amdgcnspirv -nogpuinc -nogpulib %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=HIP,SPIRV %s // CUDA: ptxas // CUDA-SAME: -m64 @@ -95,3 +97,4 @@ // GFX810:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx810 // GFX900:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx900 // GFX902:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx902 +// SPIRV:-targets=host-x86_64-unknown-linux,hip-spirv64-amd-amdhsa--amdgcnspirv diff --git a/clang/test/Frontend/embed-bitcode.ll b/clang/test/Frontend/embed-bitcode.ll index 9b8632d04dd98..0959af48ad24a 100644 --- a/clang/test/Frontend/embed-bitcode.ll +++ b/clang/test/Frontend/embed-bitcode.ll @@ -10,6 +10,9 @@ ; RUN: %clang_cc1 -triple aarch64 -emit-llvm \ ; RUN: -fembed-bitcode=all -x ir %s -o - \ ; RUN: | FileCheck %s -check-prefix=CHECK-ELF +; RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm \ +; RUN: -fembed-bitcode=all -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-ELF ; check .bc input ; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \ diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 1a9063ee5a257..700860378ed0c 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -29,7 +29,7 @@ // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX // NVPTX: error: unknown target CPU 'not-a-cpu' -// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201{{$}} +// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx12-generic, gfx1200, gfx1201, amdgcnspirv{{$}} // RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600 // R600: error: unknown target CPU 'not-a-cpu' diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index ba16c0851e1fd..7a228fb6c08b9 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -5309,6 +5309,8 @@ static const char *getSectionNameForBitcode(const Triple &T) { llvm_unreachable("GOFF is not yet implemented"); break; case Triple::SPIRV: + if (T.getVendor() == Triple::AMD) + return ".llvmbc"; llvm_unreachable("SPIRV is not yet implemented"); break; case Triple::XCOFF: @@ -5334,6 +5336,8 @@ static const char *getSectionNameForCommandline(const Triple &T) { llvm_unreachable("GOFF is not yet implemented"); break; case Triple::SPIRV: + if (T.getVendor() == Triple::AMD) + return ".llvmcmd"; llvm_unreachable("SPIRV is not yet implemented"); break; case Triple::XCOFF: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits