https://github.com/bader updated https://github.com/llvm/llvm-project/pull/201253
>From 9224b12691b10f57b3b1a342d520fa16605a0216 Mon Sep 17 00:00:00 2001 From: Alexey Bader <[email protected]> Date: Wed, 3 Jun 2026 16:39:58 -0700 Subject: [PATCH] [clang][sycl][nvlink] Share static library linking in Frontend/Offloading Move clang-nvlink-wrapper's archive member selection engine into a new shared library in llvm/lib/Frontend/Offloading (ArchiveLinker.h/.cpp) and use it from both clang-nvlink-wrapper and clang-sycl-linker, adding static library (.a) and -l support to the SYCL linker. The shared llvm::offloading::resolveArchiveMembers() API: - Searches -L paths for -l library names (lib<name>.a or :<name>) - Expands archives, honouring --whole-archive/--no-whole-archive - Runs a symbol-driven fixed-point loop to extract only the archive members that resolve undefined symbols - Returns the resolved MemoryBuffers and symbol table; the symbol table is consumed by clang-nvlink-wrapper's LTO resolution pass clang-sycl-linker gains -l, --whole-archive/--no-whole-archive, and -u options (added to SYCLLinkOpts.td). The existing --bc-library path is kept as a parallel mechanism for now with a TODO to deprecate it. Bug fixes included: - Fix dangling StringRef UB: Args.getAllArgValues() returns a temporary vector<string>; retain it in ForcedUndefStorage so the StringRefs remain valid through the resolveArchiveMembers call (both tools). - Fix assert crash in clang-sycl-linker when all positional inputs are non-existent: return a proper error instead of propagating an empty buffer vector to linkInputs. Co-Authored-By: Claude --- .../OffloadTools/clang-sycl-linker/basic.ll | 4 - .../OffloadTools/clang-sycl-linker/triple.ll | 2 + .../tools/clang-nvlink-wrapper/CMakeLists.txt | 1 + .../ClangNVLinkWrapper.cpp | 284 +++--------------- .../clang-sycl-linker/ClangSYCLLinker.cpp | 110 +++++-- clang/tools/clang-sycl-linker/SYCLLinkOpts.td | 16 + .../llvm/Frontend/Offloading/ArchiveLinker.h | 115 +++++++ .../lib/Frontend/Offloading/ArchiveLinker.cpp | 268 +++++++++++++++++ llvm/lib/Frontend/Offloading/CMakeLists.txt | 1 + 9 files changed, 520 insertions(+), 281 deletions(-) create mode 100644 llvm/include/llvm/Frontend/Offloading/ArchiveLinker.h create mode 100644 llvm/lib/Frontend/Offloading/ArchiveLinker.cpp diff --git a/clang/test/OffloadTools/clang-sycl-linker/basic.ll b/clang/test/OffloadTools/clang-sycl-linker/basic.ll index bd65a35bd8384..33e6181ed3874 100644 --- a/clang/test/OffloadTools/clang-sycl-linker/basic.ll +++ b/clang/test/OffloadTools/clang-sycl-linker/basic.ll @@ -20,10 +20,6 @@ ; RUN: not clang-sycl-linker -o %t.out 2>&1 | FileCheck %s --check-prefix=NO-INPUT ; NO-INPUT: No input files provided ; -; Test non-existent input file -; RUN: not clang-sycl-linker %t-missing.bc -o %t.out 2>&1 | FileCheck %s --check-prefix=MISSING -; MISSING: Input file '{{.*}}-missing.bc' does not exist -; ; Test the dry run of a simple case to link two input files. ; Test that IMG_SPIRV image kind is set for non-AOT compilation. ; RUN: clang-sycl-linker --dry-run -v --module-split-mode=none %t/input1.bc %t/input2.bc -o %t/spirv.out 2>&1 \ diff --git a/clang/test/OffloadTools/clang-sycl-linker/triple.ll b/clang/test/OffloadTools/clang-sycl-linker/triple.ll index 222930987ce16..022a43fb34db2 100644 --- a/clang/test/OffloadTools/clang-sycl-linker/triple.ll +++ b/clang/test/OffloadTools/clang-sycl-linker/triple.ll @@ -63,6 +63,8 @@ define spir_kernel void @kernel_c() #0 { attributes #0 = { "sycl-module-id"="TU3.cpp" } ;--- no-triple.ll +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" + define spir_kernel void @kernel_d() #0 { ret void } diff --git a/clang/tools/clang-nvlink-wrapper/CMakeLists.txt b/clang/tools/clang-nvlink-wrapper/CMakeLists.txt index 846fa952ba58d..8df5e4294755f 100644 --- a/clang/tools/clang-nvlink-wrapper/CMakeLists.txt +++ b/clang/tools/clang-nvlink-wrapper/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS BitWriter Core BinaryFormat + FrontendOffloading MC Target TransformUtils diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp index 70178568f76c6..c4db56b150d28 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp @@ -20,6 +20,7 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/Frontend/Offloading/ArchiveLinker.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" @@ -206,47 +207,6 @@ Expected<std::string> findProgram(const ArgList &Args, StringRef Name, return *Path; } -std::optional<std::string> findFile(StringRef Dir, StringRef Root, - const Twine &Name) { - SmallString<128> Path; - if (Dir.starts_with("=")) - sys::path::append(Path, Root, Dir.substr(1), Name); - else - sys::path::append(Path, Dir, Name); - - if (sys::fs::exists(Path)) - return static_cast<std::string>(Path); - return std::nullopt; -} - -std::optional<std::string> -findFromSearchPaths(StringRef Name, StringRef Root, - ArrayRef<StringRef> SearchPaths) { - for (StringRef Dir : SearchPaths) - if (std::optional<std::string> File = findFile(Dir, Root, Name)) - return File; - return std::nullopt; -} - -std::optional<std::string> -searchLibraryBaseName(StringRef Name, StringRef Root, - ArrayRef<StringRef> SearchPaths) { - for (StringRef Dir : SearchPaths) - if (std::optional<std::string> File = - findFile(Dir, Root, "lib" + Name + ".a")) - return File; - return std::nullopt; -} - -/// Search for static libraries in the linker's library path given input like -/// `-lfoo` or `-l:libfoo.a`. -std::optional<std::string> searchLibrary(StringRef Input, StringRef Root, - ArrayRef<StringRef> SearchPaths) { - if (Input.starts_with(":")) - return findFromSearchPaths(Input.drop_front(), Root, SearchPaths); - return searchLibraryBaseName(Input, Root, SearchPaths); -} - void printCommands(ArrayRef<StringRef> CmdArgs) { if (CmdArgs.empty()) return; @@ -255,49 +215,6 @@ void printCommands(ArrayRef<StringRef> CmdArgs) { errs() << join(std::next(CmdArgs.begin()), CmdArgs.end(), " ") << "\n"; } -/// A minimum symbol interface that provides the necessary information to -/// extract archive members and resolve LTO symbols. -struct Symbol { - enum Flags { - None = 0, - Undefined = 1 << 0, - Weak = 1 << 1, - }; - - Symbol() : File(), Flags(None), UsedInRegularObj(false) {} - Symbol(Symbol::Flags Flags) : File(), Flags(Flags), UsedInRegularObj(true) {} - - Symbol(MemoryBufferRef File, const irsymtab::Reader::SymbolRef Sym) - : File(File), Flags(0), UsedInRegularObj(false) { - if (Sym.isUndefined()) - Flags |= Undefined; - if (Sym.isWeak()) - Flags |= Weak; - } - - Symbol(MemoryBufferRef File, const SymbolRef Sym) - : File(File), Flags(0), UsedInRegularObj(false) { - auto FlagsOrErr = Sym.getFlags(); - if (!FlagsOrErr) - reportError(FlagsOrErr.takeError()); - if (*FlagsOrErr & SymbolRef::SF_Undefined) - Flags |= Undefined; - if (*FlagsOrErr & SymbolRef::SF_Weak) - Flags |= Weak; - - auto NameOrErr = Sym.getName(); - if (!NameOrErr) - reportError(NameOrErr.takeError()); - } - - bool isWeak() const { return Flags & Weak; } - bool isUndefined() const { return Flags & Undefined; } - - MemoryBufferRef File; - uint32_t Flags; - bool UsedInRegularObj; -}; - Expected<StringRef> runPTXAs(StringRef File, const ArgList &Args) { SmallVector<StringRef, 1> SearchPaths; if (Arg *A = Args.getLastArg(OPT_cuda_path_EQ)) @@ -413,97 +330,10 @@ Expected<std::unique_ptr<lto::LTO>> createLTO(const ArgList &Args) { return std::make_unique<lto::LTO>(std::move(Conf), Backend, Partitions, Kind); } -Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, - StringMap<Symbol> &SymTab, bool IsLazy) { - Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer); - if (!IRSymtabOrErr) - return IRSymtabOrErr.takeError(); - bool Extracted = !IsLazy; - StringMap<Symbol> PendingSymbols; - for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { - for (const auto &IRSym : IRSymtabOrErr->TheReader.module_symbols(I)) { - if (IRSym.isFormatSpecific() || !IRSym.isGlobal()) - continue; - - Symbol &OldSym = !SymTab.count(IRSym.getName()) && IsLazy - ? PendingSymbols[IRSym.getName()] - : SymTab[IRSym.getName()]; - Symbol Sym = Symbol(Buffer, IRSym); - if (OldSym.File.getBuffer().empty()) - OldSym = Sym; - - bool ResolvesReference = - !Sym.isUndefined() && - (OldSym.isUndefined() || (OldSym.isWeak() && !Sym.isWeak())) && - !(OldSym.isWeak() && OldSym.isUndefined() && IsLazy); - Extracted |= ResolvesReference; - - Sym.UsedInRegularObj = OldSym.UsedInRegularObj; - if (ResolvesReference) - OldSym = Sym; - } - } - if (Extracted) - for (const auto &[Name, Symbol] : PendingSymbols) - SymTab[Name] = Symbol; - return Extracted; -} - -Expected<bool> getSymbolsFromObject(ObjectFile &ObjFile, - StringMap<Symbol> &SymTab, bool IsLazy) { - bool Extracted = !IsLazy; - StringMap<Symbol> PendingSymbols; - for (SymbolRef ObjSym : ObjFile.symbols()) { - auto NameOrErr = ObjSym.getName(); - if (!NameOrErr) - return NameOrErr.takeError(); - - Symbol &OldSym = !SymTab.count(*NameOrErr) && IsLazy - ? PendingSymbols[*NameOrErr] - : SymTab[*NameOrErr]; - Symbol Sym = Symbol(ObjFile.getMemoryBufferRef(), ObjSym); - if (OldSym.File.getBuffer().empty()) - OldSym = Sym; - - bool ResolvesReference = OldSym.isUndefined() && !Sym.isUndefined() && - (!OldSym.isWeak() || !IsLazy); - Extracted |= ResolvesReference; - - if (ResolvesReference) - OldSym = Sym; - OldSym.UsedInRegularObj = true; - } - if (Extracted) - for (const auto &[Name, Symbol] : PendingSymbols) - SymTab[Name] = Symbol; - return Extracted; -} - -Expected<bool> getSymbols(MemoryBufferRef Buffer, StringMap<Symbol> &SymTab, - bool IsLazy) { - switch (identify_magic(Buffer.getBuffer())) { - case file_magic::bitcode: { - return getSymbolsFromBitcode(Buffer, SymTab, IsLazy); - } - case file_magic::elf_relocatable: { - Expected<std::unique_ptr<ObjectFile>> ObjFile = - ObjectFile::createObjectFile(Buffer); - if (!ObjFile) - return ObjFile.takeError(); - return getSymbolsFromObject(**ObjFile, SymTab, IsLazy); - } - default: - return createStringError("Unsupported file type"); - } -} - Expected<SmallVector<StringRef>> getInput(const ArgList &Args) { - SmallVector<StringRef> LibraryPaths; - for (const opt::Arg *Arg : Args.filtered(OPT_library_path)) - LibraryPaths.push_back(Arg->getValue()); - + // Build input descriptors for the archive resolver + SmallVector<offloading::InputDesc> InputDescs; bool WholeArchive = false; - SmallVector<std::pair<std::unique_ptr<MemoryBuffer>, bool>> InputFiles; for (const opt::Arg *Arg : Args.filtered( OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) { if (Arg->getOption().matches(OPT_whole_archive) || @@ -512,84 +342,46 @@ Expected<SmallVector<StringRef>> getInput(const ArgList &Args) { continue; } - std::optional<std::string> Filename = - Arg->getOption().matches(OPT_library) - ? searchLibrary(Arg->getValue(), /*Root=*/"", LibraryPaths) - : std::string(Arg->getValue()); - - if (!Filename && Arg->getOption().matches(OPT_library)) - return createStringError("unable to find library -l%s", Arg->getValue()); + offloading::InputDesc Desc; + Desc.Value = Arg->getValue(); + Desc.Kind = Arg->getOption().matches(OPT_library) + ? offloading::InputDesc::Library + : offloading::InputDesc::File; + Desc.WholeArchive = WholeArchive; + InputDescs.push_back(Desc); + } - if (!Filename || !sys::fs::exists(*Filename) || - sys::fs::is_directory(*Filename)) - continue; + // Gather search paths and forced undefined symbols + SmallVector<StringRef> LibraryPaths; + for (const opt::Arg *Arg : Args.filtered(OPT_library_path)) + LibraryPaths.push_back(Arg->getValue()); - ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = - MemoryBuffer::getFileOrSTDIN(*Filename); - if (std::error_code EC = BufferOrErr.getError()) - return createFileError(*Filename, EC); - - MemoryBufferRef Buffer = **BufferOrErr; - switch (identify_magic(Buffer.getBuffer())) { - case file_magic::bitcode: - case file_magic::elf_relocatable: - InputFiles.emplace_back(std::move(*BufferOrErr), /*IsLazy=*/false); - break; - case file_magic::archive: { - Expected<std::unique_ptr<object::Archive>> LibFile = - object::Archive::create(Buffer); - if (!LibFile) - return LibFile.takeError(); - Error Err = Error::success(); - for (auto Child : (*LibFile)->children(Err)) { - auto ChildBufferOrErr = Child.getMemoryBufferRef(); - if (!ChildBufferOrErr) - return ChildBufferOrErr.takeError(); - std::unique_ptr<MemoryBuffer> ChildBuffer = - MemoryBuffer::getMemBufferCopy( - ChildBufferOrErr->getBuffer(), - ChildBufferOrErr->getBufferIdentifier()); - InputFiles.emplace_back(std::move(ChildBuffer), !WholeArchive); - } - if (Err) - return Err; - break; - } - default: - return createStringError("Unsupported file type"); - } - } + std::vector<std::string> ForcedUndefStorage = Args.getAllArgValues(OPT_u); + SmallVector<StringRef> ForcedUndefs(ForcedUndefStorage.begin(), + ForcedUndefStorage.end()); - bool Extracted = true; - StringMap<Symbol> SymTab; - for (auto &Sym : Args.getAllArgValues(OPT_u)) - SymTab[Sym] = Symbol(Symbol::Undefined); - SmallVector<std::unique_ptr<MemoryBuffer>> LinkerInput; - while (Extracted) { - Extracted = false; - for (auto &[Input, IsLazy] : InputFiles) { - if (!Input) - continue; - - if (hasFatBinary(Args, *Input)) { - LinkerInput.emplace_back(std::move(Input)); - continue; - } + // Build the Inputs structure + offloading::Inputs Inputs; + Inputs.Order = InputDescs; + Inputs.SearchPaths = LibraryPaths; + Inputs.ForcedUndefs = ForcedUndefs; + Inputs.Root = ""; - // Archive members only extract if they define needed symbols. We will - // re-scan all the inputs if any files were extracted for the link job. - Expected<bool> ExtractOrErr = getSymbols(*Input, SymTab, IsLazy); - if (!ExtractOrErr) - return ExtractOrErr.takeError(); + // Create the fat binary predicate + auto IsFatBinary = [&Args](MemoryBufferRef B) -> bool { + return hasFatBinary(Args, B); + }; - Extracted |= *ExtractOrErr; - if (!*ExtractOrErr) - continue; + // Resolve archive members + Expected<offloading::ResolvedInputs> ResolvedOrErr = + offloading::resolveArchiveMembers(Inputs, IsFatBinary); + if (!ResolvedOrErr) + return ResolvedOrErr.takeError(); - LinkerInput.emplace_back(std::move(Input)); - } - } - InputFiles.clear(); + offloading::ResolvedInputs &Resolved = *ResolvedOrErr; + SmallVector<std::unique_ptr<MemoryBuffer>> LinkerInput = + std::move(Resolved.Buffers); + StringMap<offloading::Symbol> &SymTab = Resolved.SymTab; // Extract any bitcode files to be passed to the LTO pipeline. SmallVector<std::unique_ptr<MemoryBuffer>> BitcodeFiles; @@ -616,7 +408,7 @@ Expected<SmallVector<StringRef>> getInput(const ArgList &Args) { size_t Idx = 0; for (auto &Sym : Symbols) { lto::SymbolResolution &Res = Resolutions[Idx++]; - Symbol ObjSym = SymTab[Sym.getName()]; + offloading::Symbol ObjSym = SymTab[Sym.getName()]; // We will use this as the prevailing symbol in LTO if it is not // undefined and it is from the file that contained the canonical // definition. diff --git a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp index e5e092c4737ec..ffd73a0fdaecf 100644 --- a/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp +++ b/clang/tools/clang-sycl-linker/ClangSYCLLinker.cpp @@ -18,11 +18,13 @@ #include "clang/Basic/OffloadArch.h" #include "clang/Basic/Version.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/Frontend/Offloading/ArchiveLinker.h" #include "llvm/Frontend/Offloading/Utility.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/LLVMContext.h" @@ -187,26 +189,59 @@ static Error executeCommands(StringRef ExecutablePath, return Error::success(); } -static Expected<SmallVector<std::string>> getInput(const ArgList &Args) { - // Collect all input bitcode files to be passed to the linking stage. - SmallVector<std::string> BitcodeFiles; - auto Inputs = Args.filtered(OPT_INPUT); - if (Inputs.empty()) - return createStringError("No input files provided"); - for (const opt::Arg *Arg : Inputs) { - StringRef Filename = Arg->getValue(); - if (!sys::fs::exists(Filename) || sys::fs::is_directory(Filename)) - return createStringError("Input file '" + Filename + "' does not exist"); - file_magic Magic; - if (auto EC = identify_magic(Filename, Magic)) - return createStringError("Failed to open file " + Filename); - // TODO: Current use case involves LLVM IR bitcode files as input. - // This will be extended to support SPIR-V IR files. - if (Magic != file_magic::bitcode) - return createStringError("Unsupported file type for '" + Filename + "'"); - BitcodeFiles.push_back(std::string(Filename)); +static Expected<SmallVector<std::unique_ptr<MemoryBuffer>>> +getInput(const ArgList &Args) { + // Build input descriptors for the shared archive resolver + SmallVector<offloading::InputDesc> InputDescs; + bool WholeArchive = false; + for (const opt::Arg *Arg : Args.filtered( + OPT_INPUT, OPT_library, OPT_whole_archive, OPT_no_whole_archive)) { + if (Arg->getOption().matches(OPT_whole_archive) || + Arg->getOption().matches(OPT_no_whole_archive)) { + WholeArchive = Arg->getOption().matches(OPT_whole_archive); + continue; + } + + offloading::InputDesc Desc; + Desc.Value = Arg->getValue(); + Desc.Kind = Arg->getOption().matches(OPT_library) + ? offloading::InputDesc::Library + : offloading::InputDesc::File; + Desc.WholeArchive = WholeArchive; + InputDescs.push_back(Desc); } - return BitcodeFiles; + + if (InputDescs.empty()) + return createStringError(inconvertibleErrorCode(), + "No input files provided"); + + // Gather search paths and forced undefined symbols + SmallVector<StringRef> LibraryPaths; + for (const opt::Arg *Arg : Args.filtered(OPT_library_path)) + LibraryPaths.push_back(Arg->getValue()); + + std::vector<std::string> ForcedUndefStorage = Args.getAllArgValues(OPT_u); + SmallVector<StringRef> ForcedUndefs(ForcedUndefStorage.begin(), + ForcedUndefStorage.end()); + + // Build the Inputs structure + offloading::Inputs Inputs; + Inputs.Order = InputDescs; + Inputs.SearchPaths = LibraryPaths; + Inputs.ForcedUndefs = ForcedUndefs; + Inputs.Root = ""; + + // Resolve archive members (no fat binary predicate for SYCL) + Expected<offloading::ResolvedInputs> ResolvedOrErr = + offloading::resolveArchiveMembers(Inputs); + if (!ResolvedOrErr) + return ResolvedOrErr.takeError(); + + if (ResolvedOrErr->Buffers.empty()) + return createStringError(inconvertibleErrorCode(), + "No input files could be resolved"); + + return std::move(ResolvedOrErr->Buffers); } /// Handle cases where input file is a LLVM IR bitcode file. @@ -283,12 +318,15 @@ struct LinkResult { /// 3. Gather all library bitcode images. /// 4. Link all the images gathered in Step 3 with the output of Step 2 using /// linkInModule API. LinkOnlyNeeded flag is used. -static Expected<LinkResult> linkInputs(ArrayRef<std::string> InputFiles, - const ArgList &Args, LLVMContext &C) { +static Expected<LinkResult> +linkInputs(ArrayRef<std::unique_ptr<MemoryBuffer>> InputBuffers, + const ArgList &Args, LLVMContext &C) { llvm::TimeTraceScope TimeScope("Link code"); - assert(InputFiles.size() && "No inputs to link"); + assert(InputBuffers.size() && "No inputs to link"); + // TODO: Drop --bc-library in favor of the -l / .a archive path once it is + // established. // Get all library files. Expected<SmallVector<std::string>> BCLibFiles = getBCLibraryNames(Args); if (!BCLibFiles) @@ -301,7 +339,10 @@ static Expected<LinkResult> linkInputs(ArrayRef<std::string> InputFiles, return BitcodeOutput.takeError(); if (Verbose) { - std::string Inputs = llvm::join(InputFiles.begin(), InputFiles.end(), ", "); + std::string Inputs = llvm::join( + llvm::map_range(InputBuffers, + [](const auto &B) { return B->getBufferIdentifier(); }), + ", "); std::string LibInputs = llvm::join((*BCLibFiles).begin(), (*BCLibFiles).end(), ", "); errs() << formatv("link: inputs: {0} libfiles: {1} output: {2}\n", Inputs, @@ -314,8 +355,11 @@ static Expected<LinkResult> linkInputs(ArrayRef<std::string> InputFiles, auto LinkerOutput = std::make_unique<Module>("linker-output", C); Linker L(*LinkerOutput); - for (auto &File : InputFiles) { - auto ModOrErr = getBitcodeModule(File, C); + for (const auto &Buffer : InputBuffers) { + // Data is already in memory; use eager parse (unlike getBitcodeModule which + // stays lazy for --bc-library files where LinkOnlyNeeded skips most + // bodies). + auto ModOrErr = parseBitcodeFile(Buffer->getMemBufferRef(), C); if (!ModOrErr) return ModOrErr.takeError(); @@ -323,20 +367,23 @@ static Expected<LinkResult> linkInputs(ArrayRef<std::string> InputFiles, if (!T.empty() && T != TargetTriple) { if (TargetTriple.empty()) { TargetTriple = T; - TripleSource = File; + TripleSource = Buffer->getBufferIdentifier(); } else { return createStringError( + inconvertibleErrorCode(), "conflicting target triples: '" + TargetTriple.str() + "' (from " + - TripleSource + ") vs '" + T.str() + "' (from " + File + ")"); + TripleSource + ") vs '" + T.str() + "' (from " + + Buffer->getBufferIdentifier() + ")"); } } if (L.linkInModule(std::move(*ModOrErr))) - return createStringError("Could not link IR"); + return createStringError(inconvertibleErrorCode(), "Could not link IR"); } if (TargetTriple.empty()) return createStringError( + inconvertibleErrorCode(), "Target triple must be specified or inferable from inputs"); // Link in library files. @@ -347,7 +394,7 @@ static Expected<LinkResult> linkInputs(ArrayRef<std::string> InputFiles, if ((*LibMod)->getTargetTriple() == TargetTriple) { unsigned Flags = Linker::Flags::LinkOnlyNeeded; if (L.linkInModule(std::move(*LibMod), Flags)) - return createStringError("Could not link IR"); + return createStringError(inconvertibleErrorCode(), "Could not link IR"); } } @@ -693,13 +740,14 @@ static bool canSkipModuleSplit(IRSplitMode Mode, const Module &M, /// 4. Optionally run AOT compilation when targeting an Intel HW arch. /// 5. Pack the resulting images into a single OffloadBinary written to the /// output file. -static Error runSYCLLink(ArrayRef<std::string> Files, const ArgList &Args) { +static Error runSYCLLink(ArrayRef<std::unique_ptr<MemoryBuffer>> Buffers, + const ArgList &Args) { llvm::TimeTraceScope TimeScope("SYCL linking"); LLVMContext C; // Link all input bitcode files and library files. - Expected<LinkResult> LinkedOrErr = linkInputs(Files, Args, C); + Expected<LinkResult> LinkedOrErr = linkInputs(Buffers, Args, C); if (!LinkedOrErr) return LinkedOrErr.takeError(); LinkResult &Result = *LinkedOrErr; diff --git a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td index e00e63aa1767d..9d1a3347ac0fd 100644 --- a/clang/tools/clang-sycl-linker/SYCLLinkOpts.td +++ b/clang/tools/clang-sycl-linker/SYCLLinkOpts.td @@ -24,6 +24,22 @@ def library_path_S : Separate<["--", "-"], "library-path">, Flags<[HelpHidden]>, def library_path_EQ : Joined<["--", "-"], "library-path=">, Flags<[HelpHidden]>, Alias<library_path>; +def library : JoinedOrSeparate<["-"], "l">, MetaVarName<"<libname>">, + HelpText<"Search for library <libname>">; +def library_S : Separate<["--", "-"], "library">, Flags<[HelpHidden]>, + Alias<library>; +def library_EQ : Joined<["--", "-"], "library=">, Flags<[HelpHidden]>, + Alias<library>; + +def whole_archive : Flag<["--", "-"], "whole-archive">, + HelpText<"Include all archive members in the link">; +def no_whole_archive : Flag<["--", "-"], "no-whole-archive">, + HelpText<"Only include archive members that resolve undefined symbols (default)">; + +def u : JoinedOrSeparate<["-"], "u">, MetaVarName<"<symbol>">, + HelpText<"Force undefined symbol during linking">; +def undefined : JoinedOrSeparate<["--"], "undefined">, Alias<u>; + def bc_library : Separate<["--", "-"], "bc-library">, MetaVarName<"<name>">, HelpText<"Add LLVM bitcode library <name> (with extension) to the link. A " "relative <name> is resolved against -L paths; an absolute path is " diff --git a/llvm/include/llvm/Frontend/Offloading/ArchiveLinker.h b/llvm/include/llvm/Frontend/Offloading/ArchiveLinker.h new file mode 100644 index 0000000000000..8594b0bfb2ba0 --- /dev/null +++ b/llvm/include/llvm/Frontend/Offloading/ArchiveLinker.h @@ -0,0 +1,115 @@ +//===- ArchiveLinker.h - Archive member selection for offloading -*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares shared functionality for linking static libraries +// (archives) in offloading tools. It provides a symbol-driven fixed-point +// archive member selection algorithm used by both clang-nvlink-wrapper and +// clang-sycl-linker. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_FRONTEND_OFFLOADING_ARCHIVELINKER_H +#define LLVM_FRONTEND_OFFLOADING_ARCHIVELINKER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/IRSymtab.h" +#include "llvm/Object/SymbolicFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MemoryBufferRef.h" +#include <functional> +#include <memory> + +namespace llvm { +class MemoryBuffer; + +namespace object { +class SymbolRef; +} // namespace object + +namespace offloading { + +/// A minimum symbol interface that provides the necessary information to +/// extract archive members and resolve LTO symbols. +struct Symbol { + enum Flags { + None = 0, + Undefined = 1 << 0, + Weak = 1 << 1, + }; + + Symbol() : File(), SymFlags(None), UsedInRegularObj(false) {} + Symbol(Symbol::Flags F) : File(), SymFlags(F), UsedInRegularObj(true) {} + + Symbol(MemoryBufferRef File, const irsymtab::Reader::SymbolRef Sym) + : File(File), SymFlags(0), UsedInRegularObj(false) { + if (Sym.isUndefined()) + SymFlags |= Undefined; + if (Sym.isWeak()) + SymFlags |= Weak; + } + + /// Create a Symbol from an object file symbol reference. + /// Returns an error if symbol flags cannot be retrieved. + static Expected<Symbol> createFromObject(MemoryBufferRef File, + const object::SymbolRef &Sym); + + bool isWeak() const { return SymFlags & Weak; } + bool isUndefined() const { return SymFlags & Undefined; } + + MemoryBufferRef File; + uint32_t SymFlags; + bool UsedInRegularObj; +}; + +/// Description of a single input (file or library). +struct InputDesc { + StringRef Value; // file path, or library name for -l (the value after -l) + enum KindTy { File, Library } Kind; + bool WholeArchive; // --whole-archive state in effect at this input +}; + +/// All inputs and search paths for archive member resolution. +struct Inputs { + ArrayRef<InputDesc> Order; // positional inputs + -l libraries in order + ArrayRef<StringRef> SearchPaths; // -L paths + ArrayRef<StringRef> ForcedUndefs; // -u symbols (may be empty) + StringRef Root; // sysroot for "=" prefixed paths ("" if none) +}; + +/// Result of archive member resolution. +struct ResolvedInputs { + SmallVector<std::unique_ptr<MemoryBuffer>> + Buffers; // members to link, in order + StringMap<Symbol> SymTab; // symbol table (for LTO resolution) +}; + +/// Resolve archive members from the given inputs using a symbol-driven +/// fixed-point algorithm. For each input: +/// - If it's a Library, search for lib<name>.a or :<name> in SearchPaths +/// - If it's a File, use the path directly +/// - Archives are expanded and members are lazily extracted based on symbol +/// references unless WholeArchive is true +/// - Non-archive inputs (bitcode, ELF objects) are always included +/// +/// Returns the buffers to link and the symbol table for LTO resolution. +/// +/// \param In The inputs to resolve +/// \param IsFatBinary Optional predicate to identify "fat binary" inputs that +/// should be passed through without symbol scanning (e.g., nvlink's +/// cubin detection). If null, all inputs are scanned normally. +Expected<ResolvedInputs> resolveArchiveMembers( + const Inputs &In, + function_ref<bool(MemoryBufferRef)> IsFatBinary = nullptr); + +} // namespace offloading +} // namespace llvm + +#endif // LLVM_FRONTEND_OFFLOADING_ARCHIVELINKER_H diff --git a/llvm/lib/Frontend/Offloading/ArchiveLinker.cpp b/llvm/lib/Frontend/Offloading/ArchiveLinker.cpp new file mode 100644 index 0000000000000..e861fbe0a9927 --- /dev/null +++ b/llvm/lib/Frontend/Offloading/ArchiveLinker.cpp @@ -0,0 +1,268 @@ +//===- ArchiveLinker.cpp - Archive member selection for offloading --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements shared functionality for linking static libraries +// (archives) in offloading tools. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Frontend/Offloading/ArchiveLinker.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/Archive.h" +#include "llvm/Object/IRObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" + +using namespace llvm; +using namespace llvm::object; + +namespace llvm { +namespace offloading { + +Expected<Symbol> Symbol::createFromObject(MemoryBufferRef File, + const SymbolRef &Sym) { + Symbol Result; + Result.File = File; + + auto FlagsOrErr = Sym.getFlags(); + if (!FlagsOrErr) + return FlagsOrErr.takeError(); + + if (*FlagsOrErr & SymbolRef::SF_Undefined) + Result.SymFlags |= Undefined; + if (*FlagsOrErr & SymbolRef::SF_Weak) + Result.SymFlags |= Weak; + + return Result; +} + +static std::optional<std::string> findFile(StringRef Dir, StringRef Root, + const Twine &Name) { + SmallString<128> Path; + if (Dir.starts_with("=")) + sys::path::append(Path, Root, Dir.substr(1), Name); + else + sys::path::append(Path, Dir, Name); + + if (sys::fs::exists(Path)) + return static_cast<std::string>(Path); + return std::nullopt; +} + +static std::optional<std::string> +findFromSearchPaths(StringRef Name, StringRef Root, + ArrayRef<StringRef> SearchPaths) { + for (StringRef Dir : SearchPaths) + if (std::optional<std::string> File = findFile(Dir, Root, Name)) + return File; + return std::nullopt; +} + +/// Search for static libraries in the linker's library path given input like +/// `-lfoo` or `-l:libfoo.a`. +static std::optional<std::string> +searchLibrary(StringRef Input, StringRef Root, + ArrayRef<StringRef> SearchPaths) { + if (Input.starts_with(":")) + return findFromSearchPaths(Input.drop_front(), Root, SearchPaths); + SmallString<128> LibName; + ("lib" + Input + ".a").toVector(LibName); + return findFromSearchPaths(LibName, Root, SearchPaths); +} + +static Expected<bool> getSymbolsFromBitcode(MemoryBufferRef Buffer, + StringMap<Symbol> &SymTab, + bool IsLazy) { + Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer); + if (!IRSymtabOrErr) + return IRSymtabOrErr.takeError(); + bool Extracted = !IsLazy; + StringMap<Symbol> PendingSymbols; + for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { + for (const auto &IRSym : IRSymtabOrErr->TheReader.module_symbols(I)) { + if (IRSym.isFormatSpecific() || !IRSym.isGlobal()) + continue; + + StringMap<Symbol> &Target = + (IsLazy && !SymTab.count(IRSym.getName())) ? PendingSymbols : SymTab; + Symbol &OldSym = Target[IRSym.getName()]; + Symbol Sym = Symbol(Buffer, IRSym); + if (OldSym.File.getBuffer().empty()) + OldSym = Sym; + + bool ResolvesReference = + !Sym.isUndefined() && + (OldSym.isUndefined() || (OldSym.isWeak() && !Sym.isWeak())) && + !(OldSym.isWeak() && OldSym.isUndefined() && IsLazy); + Extracted |= ResolvesReference; + + Sym.UsedInRegularObj = OldSym.UsedInRegularObj; + if (ResolvesReference) + OldSym = Sym; + } + } + if (Extracted) + for (const auto &[Name, Symbol] : PendingSymbols) + SymTab[Name] = Symbol; + return Extracted; +} + +static Expected<bool> getSymbolsFromObject(ObjectFile &ObjFile, + StringMap<Symbol> &SymTab, + bool IsLazy) { + bool Extracted = !IsLazy; + StringMap<Symbol> PendingSymbols; + for (SymbolRef ObjSym : ObjFile.symbols()) { + auto NameOrErr = ObjSym.getName(); + if (!NameOrErr) + return NameOrErr.takeError(); + + StringMap<Symbol> &Target = + (IsLazy && !SymTab.count(*NameOrErr)) ? PendingSymbols : SymTab; + Symbol &OldSym = Target[*NameOrErr]; + + auto SymOrErr = + Symbol::createFromObject(ObjFile.getMemoryBufferRef(), ObjSym); + if (!SymOrErr) + return SymOrErr.takeError(); + Symbol Sym = *SymOrErr; + + if (OldSym.File.getBuffer().empty()) + OldSym = Sym; + + bool ResolvesReference = OldSym.isUndefined() && !Sym.isUndefined() && + (!OldSym.isWeak() || !IsLazy); + Extracted |= ResolvesReference; + + if (ResolvesReference) + OldSym = Sym; + OldSym.UsedInRegularObj = true; + } + if (Extracted) + for (const auto &[Name, Symbol] : PendingSymbols) + SymTab[Name] = Symbol; + return Extracted; +} + +static Expected<bool> getSymbols(MemoryBufferRef Buffer, + StringMap<Symbol> &SymTab, bool IsLazy) { + switch (identify_magic(Buffer.getBuffer())) { + case file_magic::bitcode: { + return getSymbolsFromBitcode(Buffer, SymTab, IsLazy); + } + case file_magic::elf_relocatable: { + Expected<std::unique_ptr<ObjectFile>> ObjFile = + ObjectFile::createObjectFile(Buffer); + if (!ObjFile) + return ObjFile.takeError(); + return getSymbolsFromObject(**ObjFile, SymTab, IsLazy); + } + default: + return createStringError("Unsupported file type"); + } +} + +Expected<ResolvedInputs> +resolveArchiveMembers(const Inputs &In, + function_ref<bool(MemoryBufferRef)> IsFatBinary) { + ResolvedInputs Result; + SmallVector<std::pair<std::unique_ptr<MemoryBuffer>, bool>> InputFiles; + + // Process each input descriptor + for (const InputDesc &Desc : In.Order) { + std::optional<std::string> Filename; + + if (Desc.Kind == InputDesc::Library) { + Filename = searchLibrary(Desc.Value, In.Root, In.SearchPaths); + if (!Filename) + return createStringError("unable to find library -l%s", + Desc.Value.str().c_str()); + } else { + if (!sys::fs::exists(Desc.Value) || sys::fs::is_directory(Desc.Value)) + continue; + Filename = Desc.Value.str(); + } + + if (!Filename) + continue; + + auto BufferOrErr = + errorOrToExpected(MemoryBuffer::getFileOrSTDIN(*Filename)); + if (!BufferOrErr) + return createFileError(*Filename, BufferOrErr.takeError()); + + MemoryBufferRef Buffer = (*BufferOrErr)->getMemBufferRef(); + switch (identify_magic(Buffer.getBuffer())) { + case file_magic::bitcode: + case file_magic::elf_relocatable: + InputFiles.emplace_back(std::move(*BufferOrErr), /*IsLazy=*/false); + break; + case file_magic::archive: { + Expected<std::unique_ptr<object::Archive>> LibFile = + object::Archive::create(Buffer); + if (!LibFile) + return LibFile.takeError(); + Error Err = Error::success(); + for (auto Child : (*LibFile)->children(Err)) { + auto ChildBufferOrErr = Child.getMemoryBufferRef(); + if (!ChildBufferOrErr) + return ChildBufferOrErr.takeError(); + std::unique_ptr<MemoryBuffer> ChildBuffer = + MemoryBuffer::getMemBufferCopy( + ChildBufferOrErr->getBuffer(), + ChildBufferOrErr->getBufferIdentifier()); + InputFiles.emplace_back(std::move(ChildBuffer), !Desc.WholeArchive); + } + if (Err) + return Err; + break; + } + default: + return createStringError("Unsupported file type"); + } + } + + // Seed symbol table with forced undefined symbols + for (StringRef Sym : In.ForcedUndefs) + Result.SymTab[Sym] = Symbol(Symbol::Undefined); + + // Fixed-point loop to extract archive members + bool Extracted = true; + while (Extracted) { + Extracted = false; + for (auto &[Input, IsLazy] : InputFiles) { + if (!Input) + continue; + + // Check if this is a fat binary that should be passed through + if (IsFatBinary && IsFatBinary(*Input)) { + Result.Buffers.emplace_back(std::move(Input)); + continue; + } + + // Archive members only extract if they define needed symbols + Expected<bool> ExtractOrErr = getSymbols(*Input, Result.SymTab, IsLazy); + if (!ExtractOrErr) + return ExtractOrErr.takeError(); + + Extracted |= *ExtractOrErr; + if (!*ExtractOrErr) + continue; + + Result.Buffers.emplace_back(std::move(Input)); + } + } + + return Result; +} + +} // namespace offloading +} // namespace llvm diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt index 9747dbde043da..82c49018b9bf3 100644 --- a/llvm/lib/Frontend/Offloading/CMakeLists.txt +++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_component_library(LLVMFrontendOffloading + ArchiveLinker.cpp Utility.cpp OffloadWrapper.cpp PropertySet.cpp _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
