kwk updated this revision to Diff 222634.
kwk marked 10 inline comments as done.
kwk added a comment.
- Check that no additional symbols follow after the expected ones
- Use compiler-generated copy-ctor
- Cleanup from experiment
- Simplify NamedELFSymbol::hash()
- Cleanup
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D67390/new/
https://reviews.llvm.org/D67390
Files:
lldb/lit/Modules/ELF/load-from-dynsym-alone.c
lldb/lit/Modules/ELF/load-symtab-and-dynsym.c
lldb/lit/Modules/ELF/merge-symbols.yaml
lldb/lit/Modules/lit.local.cfg
lldb/lit/helper/toolchain.py
lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
Index: lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -12,6 +12,7 @@
#include <stdint.h>
#include <vector>
+#include <unordered_set>
#include "lldb/Symbol/ObjectFile.h"
#include "lldb/Utility/ArchSpec.h"
@@ -184,6 +185,8 @@
typedef std::map<lldb::addr_t, lldb_private::AddressClass>
FileAddressToAddressClassMap;
+
+ typedef std::unordered_set<elf::NamedELFSymbol> UniqueElfSymbolColl;
/// Version of this reader common to all plugins based on this class.
static const uint32_t m_plugin_version = 1;
@@ -278,7 +281,8 @@
/// will parse the symbols only once. Returns the number of symbols parsed.
unsigned ParseSymbolTable(lldb_private::Symtab *symbol_table,
lldb::user_id_t start_id,
- lldb_private::Section *symtab);
+ lldb_private::Section *symtab,
+ UniqueElfSymbolColl &unique_elf_symbols);
/// Helper routine for ParseSymbolTable().
unsigned ParseSymbols(lldb_private::Symtab *symbol_table,
@@ -286,7 +290,8 @@
lldb_private::SectionList *section_list,
const size_t num_symbols,
const lldb_private::DataExtractor &symtab_data,
- const lldb_private::DataExtractor &strtab_data);
+ const lldb_private::DataExtractor &strtab_data,
+ UniqueElfSymbolColl &unique_elf_symbols);
/// Scans the relocation entries and adds a set of artificial symbols to the
/// given symbol table for each PLT slot. Returns the number of symbols
Index: lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1871,7 +1871,8 @@
SectionList *section_list,
const size_t num_symbols,
const DataExtractor &symtab_data,
- const DataExtractor &strtab_data) {
+ const DataExtractor &strtab_data,
+ UniqueElfSymbolColl &unique_elf_symbols) {
ELFSymbol symbol;
lldb::offset_t offset = 0;
@@ -2196,20 +2197,28 @@
symbol_size_valid, // Symbol size is valid
has_suffix, // Contains linker annotations?
flags); // Symbol flags.
- symtab->AddSymbol(dc_symbol);
+
+ NamedELFSymbol needle(symbol, ConstString(symbol_ref),
+ symbol_section_sp.get() ? symbol_section_sp->GetName()
+ : ConstString());
+ if (unique_elf_symbols.insert(needle).second) {
+ symtab->AddSymbol(dc_symbol);
+ }
}
return i;
}
-unsigned ObjectFileELF::ParseSymbolTable(Symtab *symbol_table,
- user_id_t start_id,
- lldb_private::Section *symtab) {
+unsigned
+ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, user_id_t start_id,
+ lldb_private::Section *symtab,
+ UniqueElfSymbolColl &unique_elf_symbols) {
if (symtab->GetObjectFile() != this) {
// If the symbol table section is owned by a different object file, have it
// do the parsing.
ObjectFileELF *obj_file_elf =
static_cast<ObjectFileELF *>(symtab->GetObjectFile());
- return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab);
+ return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab,
+ unique_elf_symbols);
}
// Get section list for this object file.
@@ -2237,7 +2246,7 @@
size_t num_symbols = symtab_data.GetByteSize() / symtab_hdr->sh_entsize;
return ParseSymbols(symbol_table, start_id, section_list, num_symbols,
- symtab_data, strtab_data);
+ symtab_data, strtab_data, unique_elf_symbols);
}
}
@@ -2405,7 +2414,7 @@
true, // Size is valid
false, // Contains linker annotations?
0); // Symbol flags.
-
+
symbol_table->AddSymbol(jump_symbol);
}
@@ -2644,22 +2653,35 @@
// Sharable objects and dynamic executables usually have 2 distinct symbol
// tables, one named ".symtab", and the other ".dynsym". The dynsym is a
- // smaller version of the symtab that only contains global symbols. The
- // information found in the dynsym is therefore also found in the symtab,
- // while the reverse is not necessarily true.
+ // smaller version of the symtab that only contains global symbols.
+ // Information in the dynsym section is *usually* also found in the symtab,
+ // but this is not required as symtab entries can be removed after linking.
+ // The minidebuginfo format makes use of this facility to create smaller
+ // symbol tables.
Section *symtab =
section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get();
- if (!symtab) {
- // The symtab section is non-allocable and can be stripped, so if it
- // doesn't exist then use the dynsym section which should always be
- // there.
- symtab =
- section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
- .get();
- }
+
+ // A unique set of ELF symbols added to the symtab
+ UniqueElfSymbolColl unique_elf_symbols;
+
if (symtab) {
m_symtab_up.reset(new Symtab(symtab->GetObjectFile()));
- symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab);
+ symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab,
+ unique_elf_symbols);
+ }
+
+ // The symtab section is non-allocable and can be stripped, while the dynsym
+ // section which should always be always be there. If both exist we load
+ // both to support the minidebuginfo case. Otherwise we just load the dynsym
+ // section.
+ Section *dynsym =
+ section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
+ .get();
+ if (dynsym) {
+ if (!m_symtab_up)
+ m_symtab_up.reset(new Symtab(dynsym->GetObjectFile()));
+ symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym,
+ unique_elf_symbols);
}
// DT_JMPREL
Index: lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
+++ lldb/source/Plugins/ObjectFile/ELF/ELFHeader.h
@@ -24,6 +24,7 @@
#include "lldb/lldb-enumerations.h"
#include "lldb/lldb-types.h"
+#include "lldb/Utility/ConstString.h"
namespace lldb_private {
class DataExtractor;
@@ -271,6 +272,32 @@
const lldb_private::SectionList *section_list);
};
+/// A \c NamedELFSymbol is an ELFSymbol that, alongside it's name and section
+/// index, stores the name of the symbol and section as a string and only uses
+/// those for comparison instead of the name or section index which could differ
+/// depending on which section the symbol is defined in (e.g. .strtab or
+/// .dynstr) or which object file a section belongs to (see .gnu_debugdata).
+struct NamedELFSymbol : ELFSymbol {
+ lldb_private::ConstString st_name_string; ///< Actual name of the ELF symbol
+ lldb_private::ConstString
+ st_section_name_string; ///< Actual name of the section
+
+ NamedELFSymbol(const ELFSymbol &sym, lldb_private::ConstString symbol_name,
+ lldb_private::ConstString section_name);
+
+ /// \returns \c true when all fields (except name and section indexes) of the
+ /// right hand side object are the same as the once of this object; otherwise
+ /// \c false is returned. We ignore the name and section index in order to
+ /// only compare actual name strings and not where strings are located.
+ bool operator==(const NamedELFSymbol &rhs) const noexcept;
+
+ /// \c returns a combined hash value for the given \c NamedELFSymbol over all
+ /// struct fields but ignores the name and section index of the base struct in
+ /// order to only compare actual name strings and not where strings are
+ /// located.
+ std::size_t hash() const noexcept;
+};
+
/// \class ELFDynamic
/// Represents an entry in an ELF dynamic table.
struct ELFDynamic {
@@ -391,4 +418,14 @@
} // End namespace elf.
+namespace std {
+/// Define specializations of the std::hash struct for NamedELFSymbol so they
+/// can be used in an std::unordered_set.
+template <> struct hash<elf::NamedELFSymbol> {
+ std::size_t operator()(const elf::NamedELFSymbol &s) const noexcept {
+ return s.hash();
+ }
+};
+} // namespace std
+
#endif // #ifndef liblldb_ELFHeader_h_
Index: lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
===================================================================
--- lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
+++ lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp
@@ -11,6 +11,7 @@
#include "lldb/Core/Section.h"
#include "lldb/Utility/DataExtractor.h"
#include "lldb/Utility/Stream.h"
+#include "llvm/ADT/Hashing.h"
#include "ELFHeader.h"
@@ -353,6 +354,28 @@
return true;
}
+// NamedELFSymbol
+
+NamedELFSymbol::NamedELFSymbol(const ELFSymbol &other,
+ lldb_private::ConstString symbol_name,
+ lldb_private::ConstString section_name)
+ : ELFSymbol(other), st_name_string(symbol_name),
+ st_section_name_string(section_name) {}
+
+bool NamedELFSymbol::operator==(const NamedELFSymbol &rhs) const noexcept {
+ return st_value == rhs.st_value && st_size == rhs.st_size &&
+ st_info == rhs.st_info && st_other == rhs.st_other &&
+ st_name_string == rhs.st_name_string &&
+ st_section_name_string == rhs.st_section_name_string;
+}
+
+std::size_t NamedELFSymbol::hash() const noexcept {
+ // ignore the name and section index when hashing the ELFSymbol
+ return llvm::hash_combine(st_value, st_size, st_info, st_other,
+ st_name_string.AsCString(),
+ st_section_name_string.AsCString());
+}
+
// ELFProgramHeader
ELFProgramHeader::ELFProgramHeader() {
Index: lldb/lit/helper/toolchain.py
===================================================================
--- lldb/lit/helper/toolchain.py
+++ lldb/lit/helper/toolchain.py
@@ -126,6 +126,6 @@
support_tools = ['yaml2obj', 'obj2yaml', 'llvm-pdbutil',
'llvm-mc', 'llvm-readobj', 'llvm-objdump',
- 'llvm-objcopy', 'lli']
+ 'llvm-objcopy', 'lli', 'llvm-strip']
additional_tool_dirs += [config.lldb_tools_dir, config.llvm_tools_dir]
llvm_config.add_tool_substitutions(support_tools, additional_tool_dirs)
Index: lldb/lit/Modules/lit.local.cfg
===================================================================
--- lldb/lit/Modules/lit.local.cfg
+++ lldb/lit/Modules/lit.local.cfg
@@ -1 +1 @@
-config.suffixes = ['.s', '.test', '.yaml']
+config.suffixes = ['.s', '.test', '.yaml', '.c']
Index: lldb/lit/Modules/ELF/merge-symbols.yaml
===================================================================
--- /dev/null
+++ lldb/lit/Modules/ELF/merge-symbols.yaml
@@ -0,0 +1,98 @@
+# This test ensures that two symbols with the same name are only merged if all
+# of the following attributes are the same: symbol name, size, address/value,
+# section name.
+
+# RUN: yaml2obj %s > %t
+
+# RUN: llvm-objcopy \
+# RUN: --redefine-sym=all_same2=all_same1 \
+# RUN: --redefine-sym=different_section2=different_section1 \
+# RUN: --redefine-sym=different_address2=different_address1 \
+# RUN: --redefine-sym=different_size2=different_size1 %t
+
+# RUN: %lldb -b -o 'image dump symtab' %t | FileCheck %s
+
+# CHECK: Index UserID DSX Type File Address/Value Load Address Size Flags Name
+# CHECK: [ 0] 1 Code 0x0000000000500000 0x0000000000000008 0x00000002 all_same1
+# CHECK-NEXT: [ 1] 3 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_section1
+# CHECK-NEXT: [ 2] 4 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_section1
+# CHECK-NEXT: [ 3] 5 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_address1
+# CHECK-NEXT: [ 4] 6 Code 0x0000000000500001 0x0000000000000008 0x00000002 different_address1
+# CHECK-NEXT: [ 5] 7 Code 0x0000000000500000 0x0000000000000008 0x00000002 different_size1
+# CHECK-NEXT: [ 6] 8 Code 0x0000000000500000 0x0000000000000009 0x00000002 different_size1
+# CHECK-EMPTY:
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_EXEC
+ Machine: EM_X86_64
+ Entry: 0x0000000000400000
+Sections:
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x0000000000400000
+ AddressAlign: 0x0000000000000010
+ Content: DEADBEEFBAADF00D
+ - Name: .text2
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x0000000000500000
+ AddressAlign: 0x0000000000000010
+ Content: DEADBEEFBAADF00D
+Symbols:
+ # Symbols with same addresses, names, sizes but different sections
+ - Name: all_same1
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000500000
+ Size: 0x0000000000000008
+ # Symbol all_same2 will be renamed to all_same1 and should therefore
+ # disappear from the dumped symtab because all fields are equal.
+ - Name: all_same2
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000500000
+ Size: 0x0000000000000008
+ # Symbols with same addresses, names, sizes and sections
+ - Name: different_section1
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000500000
+ Size: 0x0000000000000008
+ # Symbol different_section2 will be renamed to different_section1 and we
+ # should# see two symbols named different_section1 in the dumped symtab.
+ - Name: different_section2
+ Type: STT_FUNC
+ Section: .text2
+ Value: 0x0000000000500000
+ Size: 0x0000000000000008
+ # Symbols with same names, sizes and sections but different addresses
+ - Name: different_address1
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000500000
+ Size: 0x0000000000000008
+ # Symbol different_address2 will be renamed to different_address1 and we
+ # should see two symbols named different_address1 in the dumped symtab.
+ - Name: different_address2
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000500001
+ Size: 0x0000000000000008
+ # Symbols with same names, addresses and sections but different sizes
+ - Name: different_size1
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000500000
+ Size: 0x0000000000000008
+ # Symbol different_size2 will be renamed to different_size1 and we should
+ # see two symbols named different_size1 in the dumped symtab.
+ - Name: different_size2
+ Type: STT_FUNC
+ Section: .text
+ Value: 0x0000000000500000
+ Size: 0x0000000000000009
+...
Index: lldb/lit/Modules/ELF/load-symtab-and-dynsym.c
===================================================================
--- /dev/null
+++ lldb/lit/Modules/ELF/load-symtab-and-dynsym.c
@@ -0,0 +1,59 @@
+// REQUIRES: system-linux
+
+// This test ensures that we will load .dynsym even if there's a .symtab section.
+// We do this by compiling a small C program with two functions and we direct the
+// linker where to put the symbols so that in the end the layout is as follows:
+//
+// Symbol table '.dynsym' contains 4 entries:
+// Num: Value Size Type Bind Vis Ndx Name
+// 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
+// 1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __libc_start_main@GLIBC_2.2.5
+// 2: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__
+// 3: 0000000000401120 13 FUNC GLOBAL DEFAULT 10 functionInDynsym
+//
+// Symbol table '.symtab' contains 2 entries:
+// Num: Value Size Type Bind Vis Ndx Name
+// 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
+// 1: 0000000000401110 15 FUNC GLOBAL DEFAULT 10 functionInSymtab
+
+// We want to keep the symbol "functionInDynsym" in the .dynsym section and not
+// have it put the default .symtab section.
+// RUN: echo "{functionInDynsym;};" > %T/dynamic-symbols.txt
+// RUN: %clang -Wl,--dynamic-list=%T/dynamic-symbols.txt -g -o %t.binary %s
+
+// Remove not needed symbols
+// RUN: echo "functionInSymtab" > %t.keep_symbols
+// RUN: echo "functionInDynsym" >> %t.keep_symbols
+// RUN: llvm-objcopy --strip-all --remove-section .gdb_index --remove-section .comment --keep-symbols=%t.keep_symbols %t.binary
+
+// Remove functionInDynsym symbol from .symtab (will leave symbol in .dynsym intact)
+// RUN: llvm-strip --strip-symbol=functionInDynsym %t.binary
+
+// RUN: %lldb -b -o 'b functionInSymtab' -o 'b functionInDynsym' -o 'run' -o 'continue' %t.binary | FileCheck %s
+
+// CHECK: (lldb) b functionInSymtab
+// CHECK-NEXT: Breakpoint 1: where = {{.*}}.binary`functionInSymtab, address = 0x{{.*}}
+
+// CHECK: (lldb) b functionInDynsym
+// CHECK-NEXT: Breakpoint 2: where = {{.*}}.binary`functionInDynsym, address = 0x{{.*}}
+
+// CHECK: (lldb) run
+// CHECK-NEXT: Process {{.*}} stopped
+// CHECK-NEXT: * thread #1, name = 'load-symtab-and', stop reason = breakpoint 1.1
+
+// CHECK: (lldb) continue
+// CHECK-NEXT: Process {{.*}} resuming
+// CHECK-NEXT: Process {{.*}} stopped
+// CHECK-NEXT: * thread #1, name = 'load-symtab-and', stop reason = breakpoint 2.1
+
+// This function will be embedded within the .symtab section of the main binary.
+int functionInSymtab(int num) { return num * 4; }
+
+// This function will be embedded within the .dynsym section of the main binary.
+int functionInDynsym(int num) { return num * 3; }
+
+int main(int argc, char *argv[]) {
+ int x = functionInSymtab(argc);
+ int y = functionInDynsym(x);
+ return y;
+}
Index: lldb/lit/Modules/ELF/load-from-dynsym-alone.c
===================================================================
--- /dev/null
+++ lldb/lit/Modules/ELF/load-from-dynsym-alone.c
@@ -0,0 +1,41 @@
+// REQUIRES: system-linux
+
+// This test ensures that we will load .dynsym even if there's no .symtab section.
+// We do this by compiling a small C program with a function and we direct the
+// linker where to put the symbols so that in the end the layout is as follows:
+//
+// Symbol table '.dynsym' contains 4 entries:
+// Num: Value Size Type Bind Vis Ndx Name
+// 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
+// 1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __libc_start_main@GLIBC_2.2.5
+// 2: 0000000000000000 0 NOTYPE WEAK DEFAULT UND __gmon_start__
+// 3: 0000000000401110 13 FUNC GLOBAL DEFAULT 10 functionInDynsym
+
+// We want to keep the symbol "functionInDynsym" in the .dynsym section and not
+// have it put the default .symtab section.
+// RUN: echo "{functionInDynsym;};" > %T/dynamic-symbols.txt
+// RUN: %clang -Wl,--dynamic-list=%T/dynamic-symbols.txt -g -o %t.binary %s
+
+// Remove not needed symbols
+// RUN: echo "functionInDynsym" > %t.keep_symbols
+// RUN: llvm-objcopy --strip-all --remove-section .gdb_index --remove-section .comment --keep-symbols=%t.keep_symbols %t.binary
+
+// Remove functionInDynsym symbol from .symtab (will leave symbol in .dynsym intact)
+// RUN: llvm-strip --strip-symbol=functionInDynsym %t.binary
+
+// RUN: %lldb -b -o 'b functionInDynsym' -o 'run' -o 'continue' %t.binary | FileCheck %s
+
+// CHECK: (lldb) b functionInDynsym
+// CHECK-NEXT: Breakpoint 1: where = {{.*}}.binary`functionInDynsym, address = 0x{{.*}}
+
+// CHECK: (lldb) run
+// CHECK-NEXT: Process {{.*}} stopped
+// CHECK-NEXT: * thread #1, name = 'load-from-dynsy', stop reason = breakpoint 1.1
+
+// This function will be embedded within the .dynsym section of the main binary.
+int functionInDynsym(int num) { return num * 3; }
+
+int main(int argc, char *argv[]) {
+ int y = functionInDynsym(argc);
+ return y;
+}
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits