https://github.com/c-rhodes updated https://github.com/llvm/llvm-project/pull/178859
>From 279f407232506ce21f60f839be8f21c73e0fd779 Mon Sep 17 00:00:00 2001 From: Rose Hudson <[email protected]> Date: Wed, 28 Jan 2026 17:45:58 +0000 Subject: [PATCH] [DTLTO] support distributing bitcode from FatLTO objects (#176928) We already have code to extract bitcode files from archives so they can be distributed. Extend this code to extract bitcode from FatLTO objects too, which otherwise cannot be used with DTLTO. (cherry picked from commit e45ea95dbe236e233ad978067688789e7478541a) --- .../dtlto/fat-lto-objects.test | 55 +++++++++++++++++++ lld/ELF/Driver.cpp | 6 +- lld/test/ELF/dtlto/timetrace.test | 4 +- llvm/include/llvm/LTO/LTO.h | 21 +++++-- llvm/lib/DTLTO/DTLTO.cpp | 30 +++++----- 5 files changed, 93 insertions(+), 23 deletions(-) create mode 100644 cross-project-tests/dtlto/fat-lto-objects.test diff --git a/cross-project-tests/dtlto/fat-lto-objects.test b/cross-project-tests/dtlto/fat-lto-objects.test new file mode 100644 index 0000000000000..22e3eed43b4e2 --- /dev/null +++ b/cross-project-tests/dtlto/fat-lto-objects.test @@ -0,0 +1,55 @@ +REQUIRES: ld.lld,llvm-ar + +# Test that a DTLTO link succeeds and outputs the expected set of files +# correctly when FatLTO objects are present. +RUN: rm -rf %t && split-file %s %t && cd %t + +# Compile bitcode. -O2 is required for cross-module importing. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -ffat-lto-objects -c \ +RUN: foo.c boo.c start.c + +# We want to test FatLTO objects when included in archives. +RUN: llvm-ar rcs foo.a foo.o +RUN: llvm-ar rcsT boo.a boo.o + +# Build with DTLTO. +RUN: %clang --target=x86_64-linux-gnu -flto=thin -ffat-lto-objects \ +RUN: -fuse-ld=lld -nostdlib foo.a boo.a start.o -Wl,--save-temps \ +RUN: -fthinlto-distributor=%python \ +RUN: -Xthinlto-distributor=%llvm_src_root/utils/dtlto/local.py + +# Check that the required output files have been created. +RUN: ls | FileCheck %s + +# thin archive member: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o +CHECK-DAG: {{^}}boo.a(boo.o at [[#BOO_OFFSET:]]).3.[[#%X,HEXPID:]].3.[[#PID:]].native.o{{$}} +# archive member: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o +CHECK-DAG: {{^}}foo.a(foo.o at [[#FOO_OFFSET:]]).2.[[#%X,HEXPID]].2.[[#PID]].native.o{{$}} +# FatLTO object: <file>.<task>.<pid>.<task>.<pid>.native.o. +CHECK-DAG: {{^}}start.o.1.[[#%X,HEXPID]].1.[[#PID]].native.o{{$}} + +# Check that all objects are named in all of the index files. +# We expect this to happen because each object references symbols from the +# others. +RUN: llvm-dis *.1.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=OBJECTS +RUN: llvm-dis *.2.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=OBJECTS +RUN: llvm-dis *.3.*.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=OBJECTS + +OBJECTS-DAG: foo.o +OBJECTS-DAG: boo.o +OBJECTS-DAG: start.o + +#--- foo.c +extern int boo(int), _start(int); +__attribute__((retain)) int foo(int x) { return x + boo(x) + _start(x); } + +#--- boo.c +extern int foo(int), _start(int); +__attribute__((retain)) int boo(int x) { return x + foo(x) + _start(x); } + +#--- start.c +extern int foo(int), boo(int); +__attribute__((retain)) int _start(int x) { return x + foo(x) + boo(x); } diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 8647752be31fe..9944cf2e73700 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -236,8 +236,10 @@ bool LinkerDriver::tryAddFatLTOFile(MemoryBufferRef mb, StringRef archiveName, IRObjectFile::findBitcodeInMemBuffer(mb); if (errorToBool(fatLTOData.takeError())) return false; - files.push_back(std::make_unique<BitcodeFile>(ctx, *fatLTOData, archiveName, - offsetInArchive, lazy)); + auto file = std::make_unique<BitcodeFile>(ctx, *fatLTOData, archiveName, + offsetInArchive, lazy); + file->obj->fatLTOObject(true); + files.push_back(std::move(file)); return true; } diff --git a/lld/test/ELF/dtlto/timetrace.test b/lld/test/ELF/dtlto/timetrace.test index 639ad36f8019f..4567b0a1d4b02 100644 --- a/lld/test/ELF/dtlto/timetrace.test +++ b/lld/test/ELF/dtlto/timetrace.test @@ -33,13 +33,13 @@ RUN: %python filter_order_and_pprint.py %t.json | FileCheck %s CHECK: "name": "Add input for DTLTO" CHECK: "name": "Add input for DTLTO" CHECK: "name": "Remove temporary inputs for DTLTO" -CHECK: "name": "Save input archive member for DTLTO" +CHECK: "name": "Serialize bitcode input for DTLTO" CHECK-SAME: "detail": "t1.a(t1.bc at [[#ARCHIVE_OFFSET:]]).1.[[PID:[A-F0-9]+]].o" CHECK: "name": "Total Add input for DTLTO" CHECK-SAME: "count": 2, CHECK: "name": "Total Remove temporary inputs for DTLTO" CHECK-SAME: "count": 1, -CHECK: "name": "Total Save input archive member for DTLTO" +CHECK: "name": "Total Serialize bitcode input for DTLTO" CHECK-SAME: "count": 1, #--- t1.ll diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h index cba5cf7eb9e62..9846d84e02383 100644 --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -131,7 +131,12 @@ class InputFile { std::vector<std::pair<StringRef, Comdat::SelectionKind>> ComdatTable; MemoryBufferRef MbRef; - bool IsMemberOfArchive = false; + bool IsFatLTOObject = false; + // For distributed compilation, each input must exist as an individual bitcode + // file on disk and be identified by its ModuleID. Archive members and FatLTO + // objects violate this. So, in these cases we flag that the bitcode must be + // written out to a new standalone file. + bool SerializeForDistribution = false; bool IsThinLTO = false; StringRef ArchivePath; StringRef MemberName; @@ -198,10 +203,16 @@ class InputFile { LLVM_ABI BitcodeModule &getPrimaryBitcodeModule(); // Returns the memory buffer reference for this input file. MemoryBufferRef getFileBuffer() const { return MbRef; } - // Returns true if this input file is a member of an archive. - bool isMemberOfArchive() const { return IsMemberOfArchive; } - // Mark this input file as a member of archive. - void memberOfArchive(bool MA) { IsMemberOfArchive = MA; } + // Returns true if this input should be serialized to disk for distribution. + // See the comment on SerializeForDistribution for details. + bool getSerializeForDistribution() const { return SerializeForDistribution; } + // Mark whether this input should be serialized to disk for distribution. + // See the comment on SerializeForDistribution for details. + void setSerializeForDistribution(bool SFD) { SerializeForDistribution = SFD; } + // Returns true if this bitcode came from a FatLTO object. + bool isFatLTOObject() const { return IsFatLTOObject; } + // Mark this bitcode as coming from a FatLTO object. + void fatLTOObject(bool FO) { IsFatLTOObject = FO; } // Returns true if bitcode is ThinLTO. bool isThinLTO() const { return IsThinLTO; } diff --git a/llvm/lib/DTLTO/DTLTO.cpp b/llvm/lib/DTLTO/DTLTO.cpp index 4d8f8ba0fc4ac..4a1107e76e47b 100644 --- a/llvm/lib/DTLTO/DTLTO.cpp +++ b/llvm/lib/DTLTO/DTLTO.cpp @@ -21,7 +21,6 @@ #include "llvm/LTO/LTO.h" #include "llvm/Object/Archive.h" #include "llvm/Support/FileSystem.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBufferRef.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" @@ -29,7 +28,6 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" -#include <iostream> #include <string> using namespace llvm; @@ -135,25 +133,29 @@ lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) { StringRef ModuleId = Input->getName(); StringRef ArchivePath = Input->getArchivePath(); - // Only process archive members. - if (ArchivePath.empty()) + // In most cases, the module ID already points to an individual bitcode file + // on disk, so no further preparation for distribution is required. + if (ArchivePath.empty() && !Input->isFatLTOObject()) return Input; SmallString<64> NewModuleId; BitcodeModule &BM = Input->getPrimaryBitcodeModule(); - // Check if the archive is a thin archive. - Expected<bool> IsThin = isThinArchive(ArchivePath); - if (!IsThin) - return IsThin.takeError(); + // For a member of a thin archive that is not a FatLTO object, there is an + // existing file on disk that can be used, so we can avoid having to + // materialize. + Expected<bool> UseThinMember = + Input->isFatLTOObject() ? false : isThinArchive(ArchivePath); + if (!UseThinMember) + return UseThinMember.takeError(); - if (*IsThin) { + if (*UseThinMember) { // For thin archives, use the path to the actual file. NewModuleId = computeThinArchiveMemberPath(ArchivePath, Input->getMemberName()); } else { - // For regular archives, generate a unique name. - Input->memberOfArchive(true); + // For regular archives and FatLTO objects, generate a unique name. + Input->setSerializeForDistribution(true); // Create unique identifier using process ID and sequence number. std::string PID = utohexstr(sys::Process::getProcessId()); @@ -175,8 +177,8 @@ lto::DTLTO::addInput(std::unique_ptr<lto::InputFile> InputPtr) { // previously terminated linker process and can be safely overwritten. Error lto::DTLTO::saveInputArchiveMember(lto::InputFile *Input) { StringRef ModuleId = Input->getName(); - if (Input->isMemberOfArchive()) { - TimeTraceScope TimeScope("Save input archive member for DTLTO", ModuleId); + if (Input->getSerializeForDistribution()) { + TimeTraceScope TimeScope("Serialize bitcode input for DTLTO", ModuleId); // Cleanup this file on abnormal process exit. if (!SaveTemps) llvm::sys::RemoveFileOnSignal(ModuleId); @@ -216,7 +218,7 @@ void lto::DTLTO::cleanup() { if (!SaveTemps) { TimeTraceScope TimeScope("Remove temporary inputs for DTLTO"); for (auto &Input : InputFiles) { - if (!Input->isMemberOfArchive()) + if (!Input->getSerializeForDistribution()) continue; std::error_code EC = sys::fs::remove(Input->getName(), /*IgnoreNonExisting=*/true); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
