https://github.com/GeorgeHuyubo created 
https://github.com/llvm/llvm-project/pull/92187

As we have debuginfod as symbol locator available in lldb now, we want to make 
full use of it.
In case of post mortem debugging, we don't always have the main executable 
available.
However, the .note.gnu.build-id of the main executable(some other modules too), 
should be available in the core file, as those binaries are loaded in memory 
and dumped in the core file.

We try to iterate through the NT_FILE entries, read and store the gnu build id 
if possible. This will be very useful as this id is the unique key which is 
needed for querying the debuginfod server.

Test:
Build and run lldb. Breakpoint set to 
https://github.com/llvm/llvm-project/blob/main/lldb/source/Plugins/SymbolLocator/Debuginfod/SymbolLocatorDebuginfod.cpp#L147
Verified after this commit, module_uuid is the correct gnu build id of the main 
executable which caused the crash(first in the NT_FILE entry)

Merged old PR by mistake. https://github.com/llvm/llvm-project/pull/92078
Reverted and re-open.

>From 740683faf4a711f8d7c169cb73492657ffb47b39 Mon Sep 17 00:00:00 2001
From: George Hu <huyubo...@gmail.com>
Date: Mon, 13 May 2024 17:03:30 -0700
Subject: [PATCH] Read and store gnu build id from loaded core file

---
 lldb/include/lldb/Target/Process.h            | 50 +++++++++++++++
 lldb/source/Commands/CommandObjectMemory.cpp  | 61 +------------------
 .../Process/elf-core/ProcessElfCore.cpp       | 50 +++++++++++++++
 .../Plugins/Process/elf-core/ProcessElfCore.h | 11 ++++
 lldb/source/Target/Process.cpp                | 27 ++++++++
 5 files changed, 140 insertions(+), 59 deletions(-)

diff --git a/lldb/include/lldb/Target/Process.h 
b/lldb/include/lldb/Target/Process.h
index aac0cf51680a9..c8a49edc5c78d 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -406,6 +406,36 @@ class Process : public 
std::enable_shared_from_this<Process>,
                                   lldb::StateType state);
   } Notifications;
 
+  class ProcessMemoryIterator {
+  public:
+    ProcessMemoryIterator(lldb::ProcessSP process_sp, lldb::addr_t base)
+        : m_process_sp(process_sp), m_base_addr(base) {
+      lldbassert(process_sp.get() != nullptr);
+    }
+
+    bool IsValid() { return m_is_valid; }
+
+    uint8_t operator[](lldb::addr_t offset) {
+      if (!IsValid())
+        return 0;
+
+      uint8_t retval = 0;
+      Status error;
+      if (0 ==
+          m_process_sp->ReadMemory(m_base_addr + offset, &retval, 1, error)) {
+        m_is_valid = false;
+        return 0;
+      }
+
+      return retval;
+    }
+
+  private:
+    lldb::ProcessSP m_process_sp;
+    lldb::addr_t m_base_addr;
+    bool m_is_valid = true;
+  };
+
   class ProcessEventData : public EventData {
     friend class Process;
 
@@ -1649,6 +1679,26 @@ class Process : public 
std::enable_shared_from_this<Process>,
 
   lldb::addr_t ReadPointerFromMemory(lldb::addr_t vm_addr, Status &error);
 
+  /// Find a string within a memory region.
+  ///
+  /// This function searches for the string represented by the provided buffer
+  /// within the memory range specified by the low and high addresses. It uses
+  /// a bad character heuristic to optimize the search process.
+  ///
+  /// \param[in] low The starting address of the memory region to be searched.
+  ///
+  /// \param[in] high The ending address of the memory region to be searched.
+  ///
+  /// \param[in] buffer A pointer to the buffer containing the string to be
+  /// searched.
+  ///
+  /// \param[in] buffer_size The size of the buffer in bytes.
+  ///
+  /// \return The address where the string was found or LLDB_INVALID_ADDRESS if
+  /// not found.
+  lldb::addr_t FindInMemory(lldb::addr_t low, lldb::addr_t high,
+                            uint8_t *buffer, size_t buffer_size);
+
   bool WritePointerToMemory(lldb::addr_t vm_addr, lldb::addr_t ptr_value,
                             Status &error);
 
diff --git a/lldb/source/Commands/CommandObjectMemory.cpp 
b/lldb/source/Commands/CommandObjectMemory.cpp
index b78a0492cca55..1c13484dede64 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -977,35 +977,6 @@ class CommandObjectMemoryFind : public CommandObjectParsed 
{
   Options *GetOptions() override { return &m_option_group; }
 
 protected:
-  class ProcessMemoryIterator {
-  public:
-    ProcessMemoryIterator(ProcessSP process_sp, lldb::addr_t base)
-        : m_process_sp(process_sp), m_base_addr(base) {
-      lldbassert(process_sp.get() != nullptr);
-    }
-
-    bool IsValid() { return m_is_valid; }
-
-    uint8_t operator[](lldb::addr_t offset) {
-      if (!IsValid())
-        return 0;
-
-      uint8_t retval = 0;
-      Status error;
-      if (0 ==
-          m_process_sp->ReadMemory(m_base_addr + offset, &retval, 1, error)) {
-        m_is_valid = false;
-        return 0;
-      }
-
-      return retval;
-    }
-
-  private:
-    ProcessSP m_process_sp;
-    lldb::addr_t m_base_addr;
-    bool m_is_valid = true;
-  };
   void DoExecute(Args &command, CommandReturnObject &result) override {
     // No need to check "process" for validity as eCommandRequiresProcess
     // ensures it is valid
@@ -1106,8 +1077,8 @@ class CommandObjectMemoryFind : public 
CommandObjectParsed {
     found_location = low_addr;
     bool ever_found = false;
     while (count) {
-      found_location = FastSearch(found_location, high_addr, buffer.GetBytes(),
-                                  buffer.GetByteSize());
+      found_location = process->FindInMemory(
+          found_location, high_addr, buffer.GetBytes(), buffer.GetByteSize());
       if (found_location == LLDB_INVALID_ADDRESS) {
         if (!ever_found) {
           result.AppendMessage("data not found within the range.\n");
@@ -1144,34 +1115,6 @@ class CommandObjectMemoryFind : public 
CommandObjectParsed {
     result.SetStatus(lldb::eReturnStatusSuccessFinishResult);
   }
 
-  lldb::addr_t FastSearch(lldb::addr_t low, lldb::addr_t high, uint8_t *buffer,
-                          size_t buffer_size) {
-    const size_t region_size = high - low;
-
-    if (region_size < buffer_size)
-      return LLDB_INVALID_ADDRESS;
-
-    std::vector<size_t> bad_char_heuristic(256, buffer_size);
-    ProcessSP process_sp = m_exe_ctx.GetProcessSP();
-    ProcessMemoryIterator iterator(process_sp, low);
-
-    for (size_t idx = 0; idx < buffer_size - 1; idx++) {
-      decltype(bad_char_heuristic)::size_type bcu_idx = buffer[idx];
-      bad_char_heuristic[bcu_idx] = buffer_size - idx - 1;
-    }
-    for (size_t s = 0; s <= (region_size - buffer_size);) {
-      int64_t j = buffer_size - 1;
-      while (j >= 0 && buffer[j] == iterator[s + j])
-        j--;
-      if (j < 0)
-        return low + s;
-      else
-        s += bad_char_heuristic[iterator[s + buffer_size - 1]];
-    }
-
-    return LLDB_INVALID_ADDRESS;
-  }
-
   OptionGroupOptions m_option_group;
   OptionGroupFindMemory m_memory_options;
   OptionGroupMemoryTag m_memory_tag_options;
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp 
b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
index 36812c27a5b6d..4ff03eb8ab485 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
@@ -6,10 +6,12 @@
 //
 
//===----------------------------------------------------------------------===//
 
+#include <cstddef>
 #include <cstdlib>
 
 #include <memory>
 #include <mutex>
+#include <tuple>
 
 #include "lldb/Core/Module.h"
 #include "lldb/Core/ModuleSpec.h"
@@ -210,6 +212,9 @@ Status ProcessElfCore::DoLoadCore() {
     }
   }
 
+  // We need to update uuid after address range is populated.
+  UpdateBuildIdForNTFileEntries();
+
   if (!ranges_are_sorted) {
     m_core_aranges.Sort();
     m_core_range_infos.Sort();
@@ -258,6 +263,7 @@ Status ProcessElfCore::DoLoadCore() {
     if (!m_nt_file_entries.empty()) {
       ModuleSpec exe_module_spec;
       exe_module_spec.GetArchitecture() = arch;
+      exe_module_spec.GetUUID() = m_nt_file_entries[0].uuid;
       exe_module_spec.GetFileSpec().SetFile(m_nt_file_entries[0].path,
                                             FileSpec::Style::native);
       if (exe_module_spec.GetFileSpec()) {
@@ -271,6 +277,16 @@ Status ProcessElfCore::DoLoadCore() {
   return error;
 }
 
+void ProcessElfCore::UpdateBuildIdForNTFileEntries() {
+  if (!m_nt_file_entries.empty()) {
+    for (NT_FILE_Entry &entry : m_nt_file_entries) {
+      std::optional<UUID> uuid = FindBuildId(entry);
+      if (uuid)
+        entry.uuid = uuid.value();
+    }
+  }
+}
+
 lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() {
   if (m_dyld_up.get() == nullptr)
     m_dyld_up.reset(DynamicLoader::FindPlugin(
@@ -983,6 +999,40 @@ llvm::Error 
ProcessElfCore::ParseThreadContextsFromNoteSegment(
   }
 }
 
+bool ProcessElfCore::IsElf(const NT_FILE_Entry entry) {
+  size_t size = strlen(llvm::ELF::ElfMagic);
+  uint8_t buf[size];
+  Status error;
+  size_t byte_read = ReadMemory(entry.start, buf, size, error);
+  if (byte_read == size)
+    return memcmp(llvm::ELF::ElfMagic, buf, size) == 0;
+  else
+    return false;
+}
+
+std::optional<UUID> ProcessElfCore::FindBuildId(const NT_FILE_Entry entry) {
+  if (!IsElf(entry))
+    return std::nullopt;
+  // Build ID is stored in the ELF file as a section named ".note.gnu.build-id"
+  uint8_t gnu_build_id_bytes[8] = {0x03, 0x00, 0x00, 0x00,
+                                   0x47, 0x4e, 0x55, 0x00};
+  lldb::addr_t gnu_build_id_addr =
+      FindInMemory(entry.start, entry.end, gnu_build_id_bytes, 8);
+  if (gnu_build_id_addr == LLDB_INVALID_ADDRESS)
+    return std::nullopt;
+  uint8_t buf[36];
+  Status error;
+  size_t byte_read = ReadMemory(gnu_build_id_addr - 8, buf, 36, error);
+  // .note.gnu.build-id starts with 04 00 00 00 {id_byte_size} 00 00 00 03 00 
00
+  // 00 47 4e 55 00
+  if (byte_read == 36) {
+    if (buf[0] == 0x04) {
+      return UUID(llvm::ArrayRef<uint8_t>(buf + 16, buf[4] /*byte size*/));
+    }
+  }
+  return std::nullopt;
+}
+
 uint32_t ProcessElfCore::GetNumThreadContexts() {
   if (!m_thread_data_valid)
     DoLoadCore();
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h 
b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
index 2cec635bbacfe..ae827f3df002c 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
@@ -117,6 +117,8 @@ class ProcessElfCore : public 
lldb_private::PostMortemProcess {
     lldb::addr_t end;
     lldb::addr_t file_ofs;
     std::string path;
+    lldb_private::UUID
+        uuid; // extracted from .note.gnu.build-id section from core file
   };
 
   // For ProcessElfCore only
@@ -158,6 +160,15 @@ class ProcessElfCore : public 
lldb_private::PostMortemProcess {
   // Returns number of thread contexts stored in the core file
   uint32_t GetNumThreadContexts();
 
+  // Populate gnu uuid for each NT_FILE entry
+  void UpdateBuildIdForNTFileEntries();
+
+  // Returns the UUID of a given NT_FILE entry
+  std::optional<lldb_private::UUID> FindBuildId(const NT_FILE_Entry entry);
+
+  // Returns true if the given NT_FILE entry is an ELF file
+  bool IsElf(const NT_FILE_Entry entry);
+
   // Parse a contiguous address range of the process from LOAD segment
   lldb::addr_t
   AddAddressRangeFromLoadSegment(const elf::ELFProgramHeader &header);
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 25afade9a8275..6f5c43bc41082 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -3191,6 +3191,33 @@ Status Process::Halt(bool clear_thread_plans, bool 
use_run_lock) {
   return Status();
 }
 
+lldb::addr_t Process::FindInMemory(lldb::addr_t low, lldb::addr_t high,
+                                   uint8_t *buffer, size_t buffer_size) {
+  const size_t region_size = high - low;
+
+  if (region_size < buffer_size)
+    return LLDB_INVALID_ADDRESS;
+
+  std::vector<size_t> bad_char_heuristic(256, buffer_size);
+  ProcessMemoryIterator iterator(shared_from_this(), low);
+
+  for (size_t idx = 0; idx < buffer_size - 1; idx++) {
+    decltype(bad_char_heuristic)::size_type bcu_idx = buffer[idx];
+    bad_char_heuristic[bcu_idx] = buffer_size - idx - 1;
+  }
+  for (size_t s = 0; s <= (region_size - buffer_size);) {
+    int64_t j = buffer_size - 1;
+    while (j >= 0 && buffer[j] == iterator[s + j])
+      j--;
+    if (j < 0)
+      return low + s;
+    else
+      s += bad_char_heuristic[iterator[s + buffer_size - 1]];
+  }
+
+  return LLDB_INVALID_ADDRESS;
+}
+
 Status Process::StopForDestroyOrDetach(lldb::EventSP &exit_event_sp) {
   Status error;
 

_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to