llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Previously there was a vector of toolchains, but a number of places assumed there was only a single toolchain. I'm also not sure how you were supposed to identify which toolchain to use from this array. Make this parallel to the stored GpuArches. For the fat binary cases, we still need to pick a toolchain so that still just picks the first one; it probably should use the most neutral available triple. This also doesn't feel like a complete fix. The various Actions all contain a reference to an OffloadingToolChain, which seems to frequently be missing and isn't set at construction time. --- Full diff: https://github.com/llvm/llvm-project/pull/190369.diff 1 Files Affected: - (modified) clang/lib/Driver/Driver.cpp (+22-18) ``````````diff diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 0686a89d42faf..f4cbc443aec36 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3296,7 +3296,9 @@ class OffloadingActionBuilder final { /// Tool chains associated with this builder. The same programming /// model may have associated one or more tool chains. + /// There should be one entry for each TargetID. SmallVector<const ToolChain *, 2> ToolChains; + const ToolChain *FatBinaryToolChain = nullptr; /// The derived arguments associated with this builder. DerivedArgList &Args; @@ -3478,9 +3480,9 @@ class OffloadingActionBuilder final { llvm::sys::path::extension(FileName) == LibFileExt)) return ABRT_Inactive; - for (auto Arch : GpuArchList) { + for (auto [Arch, ToolChain] : llvm::zip(GpuArchList, ToolChains)) { CudaDeviceActions.push_back(UA); - UA->registerDependentActionInfo(ToolChains[0], Arch, + UA->registerDependentActionInfo(ToolChain, Arch, AssociatedOffloadKind); } IsActive = true; @@ -3492,15 +3494,16 @@ class OffloadingActionBuilder final { void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, TargetID TargetID) { + auto AddTopLevel = [&](Action *A, TargetID TargetID, + const ToolChain *TC) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *ToolChains.front(), TargetID, AssociatedOffloadKind); + Dep.add(*A, *TC, TargetID, AssociatedOffloadKind); AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType())); }; // If we have a fat binary, add it to the list. if (CudaFatBinary) { - AddTopLevel(CudaFatBinary, OffloadArch::Unused); + AddTopLevel(CudaFatBinary, OffloadArch::Unused, FatBinaryToolChain); CudaDeviceActions.clear(); CudaFatBinary = nullptr; return; @@ -3514,10 +3517,10 @@ class OffloadingActionBuilder final { // architecture. assert(CudaDeviceActions.size() == GpuArchList.size() && "Expecting one action per GPU architecture."); - assert(ToolChains.size() == 1 && - "Expecting to have a single CUDA toolchain."); + assert(ToolChains.size() == GpuArchList.size() && + "Expecting to have a toolchain per GPU architecture"); for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - AddTopLevel(CudaDeviceActions[I], GpuArchList[I]); + AddTopLevel(CudaDeviceActions[I], GpuArchList[I], ToolChains[I]); CudaDeviceActions.clear(); } @@ -3550,20 +3553,21 @@ class OffloadingActionBuilder final { return true; } - std::set<StringRef> GpuArchs; + std::set<std::pair<StringRef, const ToolChain *>> GpuArchs; for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) { for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) { - ToolChains.push_back(I.second); - for (auto Arch : C.getDriver().getOffloadArchs(C, C.getArgs(), Kind, *I.second)) - GpuArchs.insert(Arch); + GpuArchs.insert({Arch, I.second}); } } - for (auto Arch : GpuArchs) + for (auto [Arch, TC] : GpuArchs) { GpuArchList.push_back(Arch.data()); + ToolChains.push_back(TC); + } + FatBinaryToolChain = ToolChains.front(); CompileHostOnly = C.getDriver().offloadHostOnly(); EmitLLVM = Args.getLastArg(options::OPT_emit_llvm); EmitAsm = Args.getLastArg(options::OPT_S); @@ -3647,7 +3651,7 @@ class OffloadingActionBuilder final { for (auto &A : {AssembleAction, BackendAction}) { OffloadAction::DeviceDependences DDep; - DDep.add(*A, *ToolChains.front(), GpuArchList[I], Action::OFK_Cuda); + DDep.add(*A, *ToolChains[I], GpuArchList[I], Action::OFK_Cuda); DeviceActions.push_back( C.MakeAction<OffloadAction>(DDep, A->getType())); } @@ -3822,7 +3826,7 @@ class OffloadingActionBuilder final { // device arch of the next action being propagated to the above link // action. OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + DDep.add(*CudaDeviceActions[I], *ToolChains[I], GpuArchList[I], AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction<OffloadAction>( DDep, CudaDeviceActions[I]->getType()); @@ -3877,7 +3881,7 @@ class OffloadingActionBuilder final { *BundleOutput) { for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *ToolChains.front(), GpuArchList[I], + DDep.add(*CudaDeviceActions[I], *ToolChains[I], GpuArchList[I], AssociatedOffloadKind); CudaDeviceActions[I] = C.MakeAction<OffloadAction>( DDep, CudaDeviceActions[I]->getType()); @@ -3915,8 +3919,8 @@ class OffloadingActionBuilder final { // Linking all inputs for the current GPU arch. // LI contains all the inputs for the linker. OffloadAction::DeviceDependences DeviceLinkDeps; - DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[0], - GpuArchList[I], AssociatedOffloadKind); + DeviceLinkDeps.add(*DeviceLinkAction, *ToolChains[I], GpuArchList[I], + AssociatedOffloadKind); Actions.push_back(C.MakeAction<OffloadAction>( DeviceLinkDeps, DeviceLinkAction->getType())); ++I; `````````` </details> https://github.com/llvm/llvm-project/pull/190369 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
