rocallahan created this revision.
rocallahan added a reviewer: ruiu.
rocallahan added a project: lld.
Herald added subscribers: llvm-commits, MaskRay, JDevlieghere, arichardson, 
emaste.
Herald added a reviewer: espindola.

Rust projects tend to link in all object files from all dependent libraries and 
rely on --gc-sections to strip unused code and data. Unfortunately 
--gc-sections doesn't currently strip any debuginfo associated with GC'ed 
sections, so lld links in the full debuginfo from all dependencies even if 
almost all that code has been discarded. See 
https://github.com/rust-lang/rust/issues/56068 for some details.

Properly stripping debuginfo for discarded sections would be difficult, but a 
simple approach that helps significantly is to mark debuginfo sections as live 
only if their associated object file has at least one live code/data section. 
This patch does that. In a (contrived but not totally artificial) Rust testcase 
linked above, it reduces the final binary size from 41MB to 6.1MB.


Repository:
  rLLD LLVM Linker

https://reviews.llvm.org/D54747

Files:
  lld/ELF/Driver.cpp
  lld/ELF/InputFiles.h
  lld/ELF/InputSection.cpp
  lld/ELF/InputSection.h
  lld/ELF/MarkLive.cpp
  lld/test/ELF/linkerscript/comdat-gc.s
  lld/test/ELF/linkerscript/debuginfo-gc.s

Index: lld/test/ELF/linkerscript/debuginfo-gc.s
===================================================================
--- /dev/null
+++ lld/test/ELF/linkerscript/debuginfo-gc.s
@@ -0,0 +1,14 @@
+# REQUIRES: x86
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %s -o %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-linux %p/Inputs/comdat-gc.s -o %t1
+# RUN: echo "SECTIONS { .text : { *(.text*) } }" > %t.script
+# RUN: ld.lld --gc-sections --script %t.script %t %t1 -o %t2
+# RUN: llvm-readobj -sections -symbols %t2 | FileCheck %s
+
+# CHECK-NOT: Name: .debug_line
+
+.file 1 "test/ELF/linkerscript/comdat_gc.s"
+.section  .text._Z3fooIiEvv,"axG",@progbits,_Z3fooIiEvv,comdat
+.loc 1 14
+  ret
Index: lld/test/ELF/linkerscript/comdat-gc.s
===================================================================
--- lld/test/ELF/linkerscript/comdat-gc.s
+++ lld/test/ELF/linkerscript/comdat-gc.s
@@ -8,6 +8,9 @@
 
 # GC1:     Name: .debug_line
 
+# Add .ctors section so all debuginfo isn't GCed
+.section  .ctors,"ax",@progbits
+
 .file 1 "test/ELF/linkerscript/comdat_gc.s"
 .section  .text._Z3fooIiEvv,"axG",@progbits,_Z3fooIiEvv,comdat
 .loc 1 14
Index: lld/ELF/MarkLive.cpp
===================================================================
--- lld/ELF/MarkLive.cpp
+++ lld/ELF/MarkLive.cpp
@@ -119,9 +119,9 @@
 // the gc pass. With that we would be able to also gc some sections holding
 // LSDAs and personality functions if we found that they were unused.
 template <class ELFT, class RelTy>
-static void
-scanEhFrameSection(EhInputSection &EH, ArrayRef<RelTy> Rels,
-                   llvm::function_ref<void(InputSectionBase *, uint64_t)> Fn) {
+static void scanEhFrameSection(
+    EhInputSection &EH, ArrayRef<RelTy> Rels,
+    llvm::function_ref<void(InputSectionBase *, uint64_t, bool)> Fn) {
   const endianness E = ELFT::TargetEndianness;
 
   for (unsigned I = 0, N = EH.Pieces.size(); I < N; ++I) {
@@ -132,7 +132,10 @@
     if (read32<E>(Piece.data().data() + 4) == 0) {
       // This is a CIE, we only need to worry about the first relocation. It is
       // known to point to the personality function.
-      resolveReloc<ELFT>(EH, Rels[FirstRelI], Fn);
+      resolveReloc<ELFT>(EH, Rels[FirstRelI],
+                         [&](InputSectionBase *Sec, uint64_t Offset) {
+                           Fn(Sec, Offset, false);
+                         });
       continue;
     }
     // This is a FDE. The relocations point to the described function or to
@@ -147,16 +150,16 @@
                          [&](InputSectionBase *Sec, uint64_t Offset) {
                            if (Sec && Sec != &InputSection::Discarded &&
                                !(Sec->Flags & SHF_EXECINSTR))
-                             Fn(Sec, 0);
+                             Fn(Sec, 0, true);
                          });
     }
   }
 }
 
 template <class ELFT>
-static void
-scanEhFrameSection(EhInputSection &EH,
-                   llvm::function_ref<void(InputSectionBase *, uint64_t)> Fn) {
+static void scanEhFrameSection(
+    EhInputSection &EH,
+    llvm::function_ref<void(InputSectionBase *, uint64_t, bool)> Fn) {
   if (!EH.NumRelocations)
     return;
 
@@ -184,41 +187,60 @@
   }
 }
 
+template <class ELFT> static void setSectionLive(InputSectionBase *Sec) {
+  Sec->Live = true;
+  if (Sec->kind() != SectionBase::Kind::Regular &&
+      Sec->kind() != SectionBase::Kind::Merge)
+    return;
+  if (!Sec->File || !ObjFile<ELFT>::classof(Sec->File))
+    return;
+  if (auto *IS = dyn_cast<InputSection>(Sec)) {
+    Sec->getFile<ELFT>()->HasLiveCodeOrData = true;
+  }
+}
+
 // This is the main function of the garbage collector.
 // Starting from GC-root sections, this function visits all reachable
 // sections to set their "Live" bits.
 template <class ELFT> static void doGcSections() {
   SmallVector<InputSection *, 256> Q;
   CNamedSections.clear();
 
-  auto Enqueue = [&](InputSectionBase *Sec, uint64_t Offset) {
+  auto EnqueueMaybeLDSA = [&](InputSectionBase *Sec, uint64_t Offset,
+                              bool IsLSDA) {
     // Skip over discarded sections. This in theory shouldn't happen, because
     // the ELF spec doesn't allow a relocation to point to a deduplicated
     // COMDAT section directly. Unfortunately this happens in practice (e.g.
     // .eh_frame) so we need to add a check.
     if (Sec == &InputSection::Discarded)
       return;
 
-
     // Usually, a whole section is marked as live or dead, but in mergeable
     // (splittable) sections, each piece of data has independent liveness bit.
     // So we explicitly tell it which offset is in use.
     if (auto *MS = dyn_cast<MergeInputSection>(Sec))
       MS->getSectionPiece(Offset)->Live = true;
 
     if (Sec->Live)
       return;
-    Sec->Live = true;
+    // LSDA does not count as "live code or data" in the object file.
+    if (IsLSDA)
+      Sec->Live = true;
+    else
+      setSectionLive<ELFT>(Sec);
 
     // Add input section to the queue.
     if (InputSection *S = dyn_cast<InputSection>(Sec))
       Q.push_back(S);
   };
+  auto EnqueueNotLDSA = [&](InputSectionBase *Sec, uint64_t Offset) {
+    EnqueueMaybeLDSA(Sec, Offset, false);
+  };
 
   auto MarkSymbol = [&](Symbol *Sym) {
     if (auto *D = dyn_cast_or_null<Defined>(Sym))
       if (auto *IS = dyn_cast_or_null<InputSectionBase>(D->Section))
-        Enqueue(IS, D->Value);
+        EnqueueNotLDSA(IS, D->Value);
   };
 
   // Add GC root symbols.
@@ -245,22 +267,22 @@
     // referenced by .eh_frame sections, so we scan them for that here.
     if (auto *EH = dyn_cast<EhInputSection>(Sec)) {
       EH->Live = true;
-      scanEhFrameSection<ELFT>(*EH, Enqueue);
+      scanEhFrameSection<ELFT>(*EH, EnqueueMaybeLDSA);
     }
 
     if (Sec->Flags & SHF_LINK_ORDER)
       continue;
     if (isReserved<ELFT>(Sec) || Script->shouldKeep(Sec))
-      Enqueue(Sec, 0);
+      EnqueueNotLDSA(Sec, 0);
     else if (isValidCIdentifier(Sec->Name)) {
       CNamedSections[Saver.save("__start_" + Sec->Name)].push_back(Sec);
       CNamedSections[Saver.save("__stop_" + Sec->Name)].push_back(Sec);
     }
   }
 
   // Mark all reachable sections.
   while (!Q.empty())
-    forEachSuccessor<ELFT>(*Q.pop_back_val(), Enqueue);
+    forEachSuccessor<ELFT>(*Q.pop_back_val(), EnqueueNotLDSA);
 }
 
 // Before calling this function, Live bits are off for all
@@ -270,7 +292,7 @@
   // If -gc-sections is missing, no sections are removed.
   if (!Config->GcSections) {
     for (InputSectionBase *Sec : InputSections)
-      Sec->Live = true;
+      setSectionLive<ELFT>(Sec);
     return;
   }
 
@@ -293,17 +315,29 @@
   // or -emit-reloc were given. And they are subject of garbage
   // collection because, if we remove a text section, we also
   // remove its relocation section.
+  bool AnyDebugSections = false;
   for (InputSectionBase *Sec : InputSections) {
+    if (Sec->Debug) {
+      AnyDebugSections = true;
+      continue;
+    }
     bool IsAlloc = (Sec->Flags & SHF_ALLOC);
     bool IsLinkOrder = (Sec->Flags & SHF_LINK_ORDER);
     bool IsRel = (Sec->Type == SHT_REL || Sec->Type == SHT_RELA);
     if (!IsAlloc && !IsLinkOrder && !IsRel)
-      Sec->Live = true;
+      setSectionLive<ELFT>(Sec);
   }
 
   // Follow the graph to mark all live sections.
   doGcSections<ELFT>();
 
+  if (AnyDebugSections)
+    // Mark debug sections as live in any object file that has a live
+    // Regular or Merge section.
+    for (InputSectionBase *Sec : InputSections)
+      if (Sec->Debug && Sec->getFile<ELFT>()->HasLiveCodeOrData)
+        setSectionLive<ELFT>(Sec);
+
   // Report garbage-collected sections.
   if (Config->PrintGcSections)
     for (InputSectionBase *Sec : InputSections)
Index: lld/ELF/InputSection.h
===================================================================
--- lld/ELF/InputSection.h
+++ lld/ELF/InputSection.h
@@ -52,15 +52,17 @@
 
   unsigned SectionKind : 3;
 
-  // The next two bit fields are only used by InputSectionBase, but we
+  // The next three bit fields are only used by InputSectionBase, but we
   // put them here so the struct packs better.
 
   // The garbage collector sets sections' Live bits.
   // If GC is disabled, all sections are considered live by default.
   unsigned Live : 1;
 
   unsigned Bss : 1;
 
+  unsigned Debug : 1;
+
   // Set for sections that should not be folded by ICF.
   unsigned KeepUnique : 1;
 
@@ -88,8 +90,8 @@
               uint64_t Entsize, uint64_t Alignment, uint32_t Type,
               uint32_t Info, uint32_t Link)
       : Name(Name), Repl(this), SectionKind(SectionKind), Live(false),
-        Bss(false), KeepUnique(false), Alignment(Alignment), Flags(Flags),
-        Entsize(Entsize), Type(Type), Link(Link), Info(Info) {}
+        Bss(false), Debug(false), KeepUnique(false), Alignment(Alignment),
+        Flags(Flags), Entsize(Entsize), Type(Type), Link(Link), Info(Info) {}
 };
 
 // This corresponds to a section of an input file.
Index: lld/ELF/InputSection.cpp
===================================================================
--- lld/ELF/InputSection.cpp
+++ lld/ELF/InputSection.cpp
@@ -73,6 +73,7 @@
 
   NumRelocations = 0;
   AreRelocsRela = false;
+  Debug = Name.startswith(".debug") || Name.startswith(".zdebug");
 
   // The ELF spec states that a value of 0 means the section has
   // no alignment constraits.
Index: lld/ELF/InputFiles.h
===================================================================
--- lld/ELF/InputFiles.h
+++ lld/ELF/InputFiles.h
@@ -215,6 +215,10 @@
   // but had one or more functions with the no_split_stack attribute.
   bool SomeNoSplitStack = false;
 
+  // True if the file has any live Regular or Merge sections that aren't
+  // the LDSA section.
+  bool HasLiveCodeOrData = false;
+
   // Pointer to this input file's .llvm_addrsig section, if it has one.
   const Elf_Shdr *AddrsigSec = nullptr;
 
Index: lld/ELF/Driver.cpp
===================================================================
--- lld/ELF/Driver.cpp
+++ lld/ELF/Driver.cpp
@@ -1491,9 +1491,7 @@
   // We do not want to emit debug sections if --strip-all
   // or -strip-debug are given.
   if (Config->Strip != StripPolicy::None)
-    llvm::erase_if(InputSections, [](InputSectionBase *S) {
-      return S->Name.startswith(".debug") || S->Name.startswith(".zdebug");
-    });
+    llvm::erase_if(InputSections, [](InputSectionBase *S) { return S->Debug; });
 
   Config->EFlags = Target->calcEFlags();
 
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to