jasonmolenda updated this revision to Diff 442720.
jasonmolenda added a comment.

Updated patch to address Jonas and David's feedback.

Updated the test C file so the mach header sizeofcmds is correct.  Update test 
to test querying an invalid mach-o binary, and the combination of a valid & 
invalid mach-o binary addresses, and check that we get the correct replies.

Add simple checks to MachProcess::GetMachOInformationFromMemory() which parses 
inferior memory as a Mach-O binary, to detect things that are not mach-o 
binaries and return false in those case.

Update callers to GetMachOInformationFromMemory() to mark any entry that had a 
problem parsing it to mark that entry is is_valid=false.

Update MachProcess::FormatDynamicLibrariesIntoJSON() to check the is_valid flag 
for each entry, and don't add those entries to the StructuredData return array. 
 Do return validly formatted mach-o's that were found during the scan, though.

MachProcess::GetLoadedDynamicLibrariesInfos() (not used on modern Darwin 
systems any more) - in the process of updating its handling of macho-parsing 
call, noticed that the indentation was all incorrect for this method.  Fixed 
it, removed an extra return at the end.  This looks like it's undergone some 
incorrect revision over the years -- probably by me.  It's near time this could 
be removed altogether most likely, but I don't want to do that just yet.

I'll add an NDEBUG assert to DynamicLoaderMacOS later which checks that lldb 
gets back the same number of binary image entries that it asked for -- so we 
can flag any case where debugserver omits a binary in our testing.  I can't 
imagine this would happen.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D128956/new/

https://reviews.llvm.org/D128956

Files:
  lldb/test/API/macosx/unregistered-macho/Makefile
  lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py
  lldb/test/API/macosx/unregistered-macho/main.c
  lldb/tools/debugserver/source/MacOSX/MachProcess.h
  lldb/tools/debugserver/source/MacOSX/MachProcess.mm

Index: lldb/tools/debugserver/source/MacOSX/MachProcess.mm
===================================================================
--- lldb/tools/debugserver/source/MacOSX/MachProcess.mm
+++ lldb/tools/debugserver/source/MacOSX/MachProcess.mm
@@ -757,12 +757,25 @@
     uint32_t dyld_platform, nub_addr_t mach_o_header_addr, int wordsize,
     struct mach_o_information &inf) {
   uint64_t load_cmds_p;
+  auto is_valid_macho_header = [](uint32_t magic, uint32_t cputype) -> bool {
+    if (magic != MH_MAGIC && magic != MH_CIGAM && magic != MH_MAGIC_64 &&
+        magic != MH_CIGAM_64)
+      return false;
+    if (cputype != CPU_TYPE_X86_64 && cputype != CPU_TYPE_ARM &&
+        cputype != CPU_TYPE_ARM64 && cputype != CPU_TYPE_ARM64_32)
+      return false;
+    return true;
+  };
+
   if (wordsize == 4) {
     struct mach_header header;
     if (ReadMemory(mach_o_header_addr, sizeof(struct mach_header), &header) !=
         sizeof(struct mach_header)) {
       return false;
     }
+    if (!is_valid_macho_header(header.magic, header.cputype))
+      return false;
+
     load_cmds_p = mach_o_header_addr + sizeof(struct mach_header);
     inf.mach_header.magic = header.magic;
     inf.mach_header.cputype = header.cputype;
@@ -779,6 +792,8 @@
                    &header) != sizeof(struct mach_header_64)) {
       return false;
     }
+    if (!is_valid_macho_header(header.magic, header.cputype))
+      return false;
     load_cmds_p = mach_o_header_addr + sizeof(struct mach_header_64);
     inf.mach_header.magic = header.magic;
     inf.mach_header.cputype = header.cputype;
@@ -896,6 +911,8 @@
   const size_t image_count = image_infos.size();
 
   for (size_t i = 0; i < image_count; i++) {
+    if (!image_infos[i].is_valid)
+      continue;
     JSONGenerator::DictionarySP image_info_dict_sp(
         new JSONGenerator::Dictionary());
     image_info_dict_sp->AddIntegerItem("load_address",
@@ -970,7 +987,6 @@
   }
 
   JSONGenerator::DictionarySP reply_sp(new JSONGenerator::Dictionary());
-  ;
   reply_sp->AddItem("images", image_infos_array_sp);
 
   return reply_sp;
@@ -985,8 +1001,8 @@
 // information.
 JSONGenerator::ObjectSP MachProcess::GetLoadedDynamicLibrariesInfos(
     nub_process_t pid, nub_addr_t image_list_address, nub_addr_t image_count) {
-  JSONGenerator::DictionarySP reply_sp;
 
+  JSONGenerator::ObjectSP empty_reply_sp(new JSONGenerator::Dictionary());
   int pointer_size = GetInferiorAddrSize(pid);
 
   std::vector<struct binary_image_information> image_infos;
@@ -994,91 +1010,89 @@
 
   uint8_t *image_info_buf = (uint8_t *)malloc(image_infos_size);
   if (image_info_buf == NULL) {
-    return reply_sp;
+    return empty_reply_sp;
+  }
+  if (ReadMemory(image_list_address, image_infos_size, image_info_buf) !=
+      image_infos_size) {
+    return empty_reply_sp;
   }
-    if (ReadMemory(image_list_address, image_infos_size, image_info_buf) !=
-        image_infos_size) {
-      return reply_sp;
-    }
 
-    ////  First the image_infos array with (load addr, pathname, mod date)
-    ///tuples
-
-    for (size_t i = 0; i < image_count; i++) {
-      struct binary_image_information info;
-      nub_addr_t pathname_address;
-      if (pointer_size == 4) {
-        uint32_t load_address_32;
-        uint32_t pathname_address_32;
-        uint32_t mod_date_32;
-        ::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4);
-        ::memcpy(&pathname_address_32,
-                 image_info_buf + (i * 3 * pointer_size) + pointer_size, 4);
-        ::memcpy(&mod_date_32, image_info_buf + (i * 3 * pointer_size) +
-                                   pointer_size + pointer_size,
-                 4);
-        info.load_address = load_address_32;
-        info.mod_date = mod_date_32;
-        pathname_address = pathname_address_32;
-      } else {
-        uint64_t load_address_64;
-        uint64_t pathname_address_64;
-        uint64_t mod_date_64;
-        ::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8);
-        ::memcpy(&pathname_address_64,
-                 image_info_buf + (i * 3 * pointer_size) + pointer_size, 8);
-        ::memcpy(&mod_date_64, image_info_buf + (i * 3 * pointer_size) +
-                                   pointer_size + pointer_size,
-                 8);
-        info.load_address = load_address_64;
-        info.mod_date = mod_date_64;
-        pathname_address = pathname_address_64;
-      }
-      char strbuf[17];
-      info.filename = "";
-      uint64_t pathname_ptr = pathname_address;
-      bool still_reading = true;
-      while (still_reading &&
-             ReadMemory(pathname_ptr, sizeof(strbuf) - 1, strbuf) ==
-                 sizeof(strbuf) - 1) {
-        strbuf[sizeof(strbuf) - 1] = '\0';
-        info.filename += strbuf;
-        pathname_ptr += sizeof(strbuf) - 1;
-        // Stop if we found nul byte indicating the end of the string
-        for (size_t i = 0; i < sizeof(strbuf) - 1; i++) {
-          if (strbuf[i] == '\0') {
-            still_reading = false;
-            break;
-          }
+  ////  First the image_infos array with (load addr, pathname, mod date)
+  /// tuples
+
+  for (size_t i = 0; i < image_count; i++) {
+    struct binary_image_information info;
+    nub_addr_t pathname_address;
+    if (pointer_size == 4) {
+      uint32_t load_address_32;
+      uint32_t pathname_address_32;
+      uint32_t mod_date_32;
+      ::memcpy(&load_address_32, image_info_buf + (i * 3 * pointer_size), 4);
+      ::memcpy(&pathname_address_32,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size, 4);
+      ::memcpy(&mod_date_32,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size +
+                   pointer_size,
+               4);
+      info.load_address = load_address_32;
+      info.mod_date = mod_date_32;
+      pathname_address = pathname_address_32;
+    } else {
+      uint64_t load_address_64;
+      uint64_t pathname_address_64;
+      uint64_t mod_date_64;
+      ::memcpy(&load_address_64, image_info_buf + (i * 3 * pointer_size), 8);
+      ::memcpy(&pathname_address_64,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size, 8);
+      ::memcpy(&mod_date_64,
+               image_info_buf + (i * 3 * pointer_size) + pointer_size +
+                   pointer_size,
+               8);
+      info.load_address = load_address_64;
+      info.mod_date = mod_date_64;
+      pathname_address = pathname_address_64;
+    }
+    char strbuf[17];
+    info.filename = "";
+    uint64_t pathname_ptr = pathname_address;
+    bool still_reading = true;
+    while (still_reading && ReadMemory(pathname_ptr, sizeof(strbuf) - 1,
+                                       strbuf) == sizeof(strbuf) - 1) {
+      strbuf[sizeof(strbuf) - 1] = '\0';
+      info.filename += strbuf;
+      pathname_ptr += sizeof(strbuf) - 1;
+      // Stop if we found nul byte indicating the end of the string
+      for (size_t i = 0; i < sizeof(strbuf) - 1; i++) {
+        if (strbuf[i] == '\0') {
+          still_reading = false;
+          break;
         }
       }
-      uuid_clear(info.macho_info.uuid);
-      image_infos.push_back(info);
-    }
-    if (image_infos.size() == 0) {
-      return reply_sp;
     }
+    uuid_clear(info.macho_info.uuid);
+    image_infos.push_back(info);
+  }
+  if (image_infos.size() == 0) {
+    return empty_reply_sp;
+  }
 
-    free(image_info_buf);
+  free(image_info_buf);
 
-    ////  Second, read the mach header / load commands for all the dylibs
+  ////  Second, read the mach header / load commands for all the dylibs
 
-    for (size_t i = 0; i < image_count; i++) {
-      // The SPI to provide platform is not available on older systems.
-      uint32_t platform = 0;
-      if (!GetMachOInformationFromMemory(platform,
-                                         image_infos[i].load_address,
-                                         pointer_size,
-                                         image_infos[i].macho_info)) {
-        return reply_sp;
-      }
+  for (size_t i = 0; i < image_count; i++) {
+    // The SPI to provide platform is not available on older systems.
+    uint32_t platform = 0;
+    if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
+                                      pointer_size,
+                                      image_infos[i].macho_info)) {
+      image_infos[i].is_valid = true;
     }
+  }
 
-    ////  Third, format all of the above in the JSONGenerator object.
-
-    return FormatDynamicLibrariesIntoJSON(image_infos);
+  ////  Third, format all of the above in the JSONGenerator object.
 
-  return reply_sp;
+  return FormatDynamicLibrariesIntoJSON(image_infos);
 }
 
 /// From dyld SPI header dyld_process_info.h
@@ -1141,7 +1155,6 @@
 // macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer.
 JSONGenerator::ObjectSP
 MachProcess::GetAllLoadedLibrariesInfos(nub_process_t pid) {
-  JSONGenerator::DictionarySP reply_sp;
 
   int pointer_size = GetInferiorAddrSize(pid);
   std::vector<struct binary_image_information> image_infos;
@@ -1149,8 +1162,11 @@
   uint32_t platform = GetPlatform();
   const size_t image_count = image_infos.size();
   for (size_t i = 0; i < image_count; i++) {
-    GetMachOInformationFromMemory(platform, image_infos[i].load_address,
-                                  pointer_size, image_infos[i].macho_info);
+    if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
+                                      pointer_size,
+                                      image_infos[i].macho_info)) {
+      image_infos[i].is_valid = true;
+    }
   }
     return FormatDynamicLibrariesIntoJSON(image_infos);
 }
@@ -1160,10 +1176,11 @@
 // dyld SPIs that exist in macOS 10.12, iOS 10, tvOS 10, watchOS 3 and newer.
 JSONGenerator::ObjectSP MachProcess::GetLibrariesInfoForAddresses(
     nub_process_t pid, std::vector<uint64_t> &macho_addresses) {
-  JSONGenerator::DictionarySP reply_sp;
 
   int pointer_size = GetInferiorAddrSize(pid);
 
+  // Collect the list of all binaries that dyld knows about in
+  // the inferior process.
   std::vector<struct binary_image_information> all_image_infos;
   GetAllLoadedBinariesViaDYLDSPI(all_image_infos);
   uint32_t platform = GetPlatform();
@@ -1171,19 +1188,35 @@
   std::vector<struct binary_image_information> image_infos;
   const size_t macho_addresses_count = macho_addresses.size();
   const size_t all_image_infos_count = all_image_infos.size();
+
   for (size_t i = 0; i < macho_addresses_count; i++) {
+    bool found_matching_entry = false;
     for (size_t j = 0; j < all_image_infos_count; j++) {
       if (all_image_infos[j].load_address == macho_addresses[i]) {
         image_infos.push_back(all_image_infos[j]);
+        found_matching_entry = true;
       }
     }
+    if (!found_matching_entry) {
+      // dyld doesn't think there is a binary at this address,
+      // but maybe there isn't a binary YET - let's look in memory
+      // for a proper mach-o header etc and return what we can.
+      // We will have an empty filename for the binary (because dyld
+      // doesn't know about it yet) but we can read all of the mach-o
+      // load commands from memory directly.
+      struct binary_image_information entry;
+      entry.load_address = macho_addresses[i];
+      image_infos.push_back(entry);
+    }
   }
 
     const size_t image_infos_count = image_infos.size();
     for (size_t i = 0; i < image_infos_count; i++) {
-      GetMachOInformationFromMemory(platform,
-                                    image_infos[i].load_address, pointer_size,
-                                    image_infos[i].macho_info);
+      if (GetMachOInformationFromMemory(platform, image_infos[i].load_address,
+                                        pointer_size,
+                                        image_infos[i].macho_info)) {
+        image_infos[i].is_valid = true;
+      }
     }
     return FormatDynamicLibrariesIntoJSON(image_infos);
 }
Index: lldb/tools/debugserver/source/MacOSX/MachProcess.h
===================================================================
--- lldb/tools/debugserver/source/MacOSX/MachProcess.h
+++ lldb/tools/debugserver/source/MacOSX/MachProcess.h
@@ -73,9 +73,11 @@
     uint64_t load_address;
     uint64_t mod_date; // may not be available - 0 if so
     struct mach_o_information macho_info;
+    bool is_valid;
 
     binary_image_information()
-        : filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0) {}
+        : filename(), load_address(INVALID_NUB_ADDRESS), mod_date(0),
+          is_valid(false) {}
   };
 
   // Child process control
Index: lldb/test/API/macosx/unregistered-macho/main.c
===================================================================
--- /dev/null
+++ lldb/test/API/macosx/unregistered-macho/main.c
@@ -0,0 +1,63 @@
+#include <mach-o/loader.h>
+#include <mach/machine.h>
+#include <stdlib.h>
+#include <string.h>
+#include <uuid/uuid.h>
+
+int main() {
+  int size_of_load_cmds =
+      sizeof(struct segment_command_64) + sizeof(struct uuid_command);
+  uint8_t *macho_buf =
+      (uint8_t *)malloc(sizeof(struct mach_header_64) + size_of_load_cmds);
+  uint8_t *p = macho_buf;
+  struct mach_header_64 mh;
+  mh.magic = MH_MAGIC_64;
+  mh.cputype = CPU_TYPE_ARM64;
+  mh.cpusubtype = 0;
+  mh.filetype = MH_EXECUTE;
+  mh.ncmds = 2;
+  mh.sizeofcmds = size_of_load_cmds;
+  mh.flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL | MH_PIE;
+
+  memcpy(p, &mh, sizeof(mh));
+  p += sizeof(mh);
+
+  struct segment_command_64 seg;
+  seg.cmd = LC_SEGMENT_64;
+  seg.cmdsize = sizeof(seg);
+  strcpy(seg.segname, "__TEXT");
+  seg.vmaddr = 0x5000;
+  seg.vmsize = 0x1000;
+  seg.fileoff = 0;
+  seg.filesize = 0;
+  seg.maxprot = 0;
+  seg.initprot = 0;
+  seg.nsects = 0;
+  seg.flags = 0;
+
+  memcpy(p, &seg, sizeof(seg));
+  p += sizeof(seg);
+
+  struct uuid_command uuid;
+  uuid.cmd = LC_UUID;
+  uuid.cmdsize = sizeof(uuid);
+  uuid_clear(uuid.uuid);
+  uuid_parse("1b4e28ba-2fa1-11d2-883f-b9a761bde3fb", uuid.uuid);
+
+  memcpy(p, &uuid, sizeof(uuid));
+  p += sizeof(uuid);
+
+  // If this needs to be debugged, the memory buffer can be written
+  // to a file with
+  // (lldb) mem rea -b -o /tmp/t -c `p - macho_buf` macho_buf
+  // (lldb) platform shell otool -hlv /tmp/t
+  // to verify that it is well formed.
+
+  // And inside lldb, it should be inspectable via
+  // (lldb) script print(lldb.frame.locals["macho_buf"][0].GetValueAsUnsigned())
+  // 105553162403968
+  // (lldb) process plugin packet send
+  // 'jGetLoadedDynamicLibrariesInfos:{"solib_addresses":[105553162403968]}]'
+
+  return 0; // break here
+}
Index: lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py
===================================================================
--- /dev/null
+++ lldb/test/API/macosx/unregistered-macho/TestUnregisteredMacho.py
@@ -0,0 +1,47 @@
+"""Test that debugserver will parse a mach-o in inferior memory even if it's not loaded."""
+
+import os
+import re
+import subprocess
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class TestUnregisteredMacho(TestBase):
+
+    # newer debugserver required for jGetLoadedDynamicLibrariesInfos 
+    # to support this
+    @skipIfOutOfTreeDebugserver  
+    @no_debug_info_test
+    @skipUnlessDarwin
+    def test(self):
+        self.build()
+        target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(
+            self, "// break here", lldb.SBFileSpec("main.c"))
+
+        frame = thread.GetFrameAtIndex(0)
+        macho_buf = frame.GetValueForVariablePath("macho_buf")
+        macho_addr = macho_buf.GetValueAsUnsigned()
+        invalid_macho_addr = macho_buf.GetValueAsUnsigned() + 4
+        gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % macho_addr
+
+        # Send the jGetLoadedDynamicLibrariesInfos packet
+        # to debugserver, asking it to parse the mach-o binary
+        # at this address and give us the UUID etc, even though
+        # dyld doesn't think there is a binary at that address.
+        # We won't get a pathname for the binary (from dyld), but
+        # we will get to the LC_UUID and include that.
+        self.expect (gdb_packet, substrs=['"pathname":""', '"uuid":"1B4E28BA-2FA1-11D2-883F-B9A761BDE3FB"'])
+
+        no_macho_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d]}]'" % invalid_macho_addr
+        self.expect (no_macho_gdb_packet, substrs=['response: {"images":[]}'])
+
+        # Test that we get back the information for the properly
+        # formatted Mach-O binary in memory, but do not get an
+        # entry for the invalid Mach-O address.
+        both_gdb_packet = "process plugin packet send 'jGetLoadedDynamicLibrariesInfos:{\"solib_addresses\":[%d,%d]}]'" % (macho_addr, invalid_macho_addr)
+        self.expect (both_gdb_packet, substrs=['response: {"images":[{"load_address":%d,' % macho_addr])
+        self.expect (both_gdb_packet, substrs=['response: {"load_address":%d,' % invalid_macho_addr], matching=False)
+
Index: lldb/test/API/macosx/unregistered-macho/Makefile
===================================================================
--- /dev/null
+++ lldb/test/API/macosx/unregistered-macho/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES = main.c
+
+include Makefile.rules
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to