Author: Felipe de Azevedo Piovezan
Date: 2025-12-22T09:59:14-03:00
New Revision: e47b10917f8d7ca8edc8b096d502a0c2b3a901dc

URL: 
https://github.com/llvm/llvm-project/commit/e47b10917f8d7ca8edc8b096d502a0c2b3a901dc
DIFF: 
https://github.com/llvm/llvm-project/commit/e47b10917f8d7ca8edc8b096d502a0c2b3a901dc.diff

LOG: [lldb] Add ReadCStrings API to Process (#172026)

This commit uses Process::ReadMemoryRanges to create an efficient method
for reading multiple strings at once. This method works like the
single-string version, reading 256 bytes at a time, but instead doing it
for _every_ string requested at the same time.

Added: 
    

Modified: 
    lldb/include/lldb/Target/Process.h
    lldb/source/Target/Process.cpp
    lldb/unittests/Target/MemoryTest.cpp

Removed: 
    


################################################################################
diff  --git a/lldb/include/lldb/Target/Process.h 
b/lldb/include/lldb/Target/Process.h
index 4dd8559addbd5..8614347d1f34a 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -1680,6 +1680,9 @@ class Process : public 
std::enable_shared_from_this<Process>,
   size_t ReadCStringFromMemory(lldb::addr_t vm_addr, std::string &out_str,
                                Status &error);
 
+  llvm::SmallVector<std::optional<std::string>>
+  ReadCStringsFromMemory(llvm::ArrayRef<lldb::addr_t> addresses);
+
   /// Reads an unsigned integer of the specified byte size from process
   /// memory.
   ///

diff  --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 9c8e8fa7041ee..ab250941b183b 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -117,6 +117,8 @@ static constexpr OptionEnumValueElement 
g_follow_fork_mode_values[] = {
     },
 };
 
+static constexpr unsigned g_string_read_width = 256;
+
 #define LLDB_PROPERTIES_process
 #include "TargetProperties.inc"
 
@@ -2135,9 +2137,63 @@ lldb::addr_t Process::FindInMemory(const uint8_t *buf, 
uint64_t size,
   return matches[0].GetBaseAddress().GetLoadAddress(&target);
 }
 
+llvm::SmallVector<std::optional<std::string>>
+Process::ReadCStringsFromMemory(llvm::ArrayRef<lldb::addr_t> addresses) {
+  llvm::SmallVector<std::optional<std::string>> output_strs(addresses.size(),
+                                                            "");
+  llvm::SmallVector<Range<addr_t, size_t>> ranges{
+      llvm::map_range(addresses, [=](addr_t ptr) {
+        return Range<addr_t, size_t>(ptr, g_string_read_width);
+      })};
+
+  std::vector<uint8_t> buffer(g_string_read_width * addresses.size(), 0);
+  uint64_t num_completed_strings = 0;
+
+  while (num_completed_strings != addresses.size()) {
+    llvm::SmallVector<llvm::MutableArrayRef<uint8_t>> read_results =
+        ReadMemoryRanges(ranges, buffer);
+
+    // Each iteration of this loop either increments num_completed_strings or
+    // updates the base pointer of some range, guaranteeing forward progress of
+    // the outer loop.
+    for (auto [range, read_result, output_str] :
+         llvm::zip(ranges, read_results, output_strs)) {
+      // A previously completed string.
+      if (range.GetByteSize() == 0)
+        continue;
+
+      // The read failed, set the range to 0 to avoid reading it again.
+      if (read_result.empty()) {
+        output_str = std::nullopt;
+        range.SetByteSize(0);
+        num_completed_strings++;
+        continue;
+      }
+
+      // Convert ArrayRef to StringRef so the pointers work with std::string.
+      auto read_result_str = llvm::toStringRef(read_result);
+
+      const char *null_terminator_pos = llvm::find(read_result_str, '\0');
+      output_str->append(read_result_str.begin(), null_terminator_pos);
+
+      // If the terminator was found, this string is complete.
+      if (null_terminator_pos != read_result_str.end()) {
+        range.SetByteSize(0);
+        num_completed_strings++;
+      }
+      // Otherwise increment the base pointer for the next read.
+      else {
+        range.SetRangeBase(range.GetRangeBase() + read_result.size());
+      }
+    }
+  }
+
+  return output_strs;
+}
+
 size_t Process::ReadCStringFromMemory(addr_t addr, std::string &out_str,
                                       Status &error) {
-  char buf[256];
+  char buf[g_string_read_width];
   out_str.clear();
   addr_t curr_addr = addr;
   while (true) {

diff  --git a/lldb/unittests/Target/MemoryTest.cpp 
b/lldb/unittests/Target/MemoryTest.cpp
index 131a3cabdd896..15b22a47e30e8 100644
--- a/lldb/unittests/Target/MemoryTest.cpp
+++ b/lldb/unittests/Target/MemoryTest.cpp
@@ -434,3 +434,76 @@ TEST_F(MemoryDeathTest, 
TestReadMemoryRangesWithShortBuffer) {
     ASSERT_TRUE(result.empty());
 #endif
 }
+
+/// A process class whose memory contains the following map of addresses to
+/// strings:
+///   100 -> "hello\0"
+///   200 -> "\0"
+///   201 -> "goodbye"
+///   300 -> a string composed of 500 'c' characters, followed by '\0'.
+///   addresses >= 1024 -> error
+class StringReaderProcess : public Process {
+public:
+  char memory[1024];
+  void initialize_memory() {
+    // Use some easily identifiable character for the areas of memory we're not
+    // intending to read.
+    memset(memory, '?', 1024);
+    strcpy(&memory[100], "hello");
+    strcpy(&memory[200], "");
+    strcpy(&memory[201], "goodbye");
+    std::vector<char> long_str(500, 'c');
+    long_str.push_back('\0');
+    strcpy(&memory[300], long_str.data());
+  }
+
+  size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size,
+                      Status &error) override {
+    if (vm_addr >= 1024) {
+      error = Status::FromErrorString("out of bounds!");
+      return 0;
+    }
+    memcpy(buf, memory + vm_addr, size);
+    return size;
+  }
+  StringReaderProcess(lldb::TargetSP target_sp, lldb::ListenerSP listener_sp)
+      : Process(target_sp, listener_sp) {
+    initialize_memory();
+  }
+  // Boilerplate, nothing interesting below.
+  bool CanDebug(lldb::TargetSP, bool) override { return true; }
+  Status DoDestroy() override { return {}; }
+  void RefreshStateAfterStop() override {}
+  bool DoUpdateThreadList(ThreadList &, ThreadList &) override { return false; 
}
+  llvm::StringRef GetPluginName() override { return "Dummy"; }
+};
+
+TEST_F(MemoryTest, TestReadCStringsFromMemory) {
+  ArchSpec arch("x86_64-apple-macosx-");
+  Platform::SetHostPlatform(PlatformRemoteMacOSX::CreateInstance(true, &arch));
+  DebuggerSP debugger_sp = Debugger::CreateInstance();
+  ASSERT_TRUE(debugger_sp);
+  TargetSP target_sp = CreateTarget(debugger_sp, arch);
+  ASSERT_TRUE(target_sp);
+  ListenerSP listener_sp(Listener::MakeListener("dummy"));
+  ProcessSP process_sp =
+      std::make_shared<StringReaderProcess>(target_sp, listener_sp);
+  ASSERT_TRUE(process_sp);
+
+  // See the docs for StringReaderProcess above for an explanation of these
+  // addresses.
+  llvm::SmallVector<std::optional<std::string>> maybe_strings =
+      process_sp->ReadCStringsFromMemory({100, 200, 201, 300, 0xffffff});
+  ASSERT_EQ(maybe_strings.size(), 5ull);
+  auto expected_valid_strings = llvm::ArrayRef(maybe_strings).take_front(4);
+
+  std::vector<char> long_str(500, 'c');
+  long_str.push_back('\0');
+  std::string big_str(long_str.data());
+
+  const std::vector<std::optional<std::string>> expected_answers = {
+      "hello", "", "goodbye", big_str, std::nullopt};
+  for (auto [maybe_str, expected_answer] :
+       llvm::zip(expected_valid_strings, expected_answers))
+    EXPECT_EQ(maybe_str, expected_answer);
+}


        
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to