Author: Anthony Ha
Date: 2024-04-18T12:24:24-07:00
New Revision: 22c26fa13d46e174e3d862e5f40cff06d804af0c

URL: 
https://github.com/llvm/llvm-project/commit/22c26fa13d46e174e3d862e5f40cff06d804af0c
DIFF: 
https://github.com/llvm/llvm-project/commit/22c26fa13d46e174e3d862e5f40cff06d804af0c.diff

LOG: [lldb] Skip remote PutFile when MD5 hashes equal (#88812)

This PR adds a check within `PutFile` to exit early when both local and
destination files have matching MD5 hashes. If they differ, or there is
trouble getting the hashes, the regular code path to put the file is
run.

As I needed this to talk to an `lldb-server` which runs the gdb-remote
protocol, I enabled `CalculateMD5` within `Platform/gdb-server` and also
found and fixed a parsing bug within it as well. Before this PR, the
client is incorrectly parsing the response packet containing the
checksum; after this PR, hopefully this is fixed. There is a test for
the parsing behavior included in this PR.

---------

Co-authored-by: Anthony Ha <an...@microsoft.com>

Added: 
    

Modified: 
    lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
    lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
    lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
    lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
    lldb/source/Target/Platform.cpp
    lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp

Removed: 
    


################################################################################
diff  --git 
a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp 
b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
index 88f1ad15b6b485..0dce5add2e3759 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
@@ -684,6 +684,14 @@ Status PlatformRemoteGDBServer::RunShellCommand(
                                           signo_ptr, command_output, timeout);
 }
 
+bool PlatformRemoteGDBServer::CalculateMD5(const FileSpec &file_spec,
+                                           uint64_t &low, uint64_t &high) {
+  if (!IsConnected())
+    return false;
+
+  return m_gdb_client_up->CalculateMD5(file_spec, low, high);
+}
+
 void PlatformRemoteGDBServer::CalculateTrapHandlerSymbolNames() {
   m_trap_handlers.push_back(ConstString("_sigtramp"));
 }

diff  --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h 
b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
index 638f7db5ef800c..d83fc386f59427 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h
@@ -146,6 +146,9 @@ class PlatformRemoteGDBServer : public Platform, private 
UserIDResolver {
 
   void CalculateTrapHandlerSymbolNames() override;
 
+  bool CalculateMD5(const FileSpec &file_spec, uint64_t &low,
+                    uint64_t &high) override;
+
   const lldb::UnixSignalsSP &GetRemoteUnixSignals() override;
 
   size_t ConnectToWaitingProcesses(lldb_private::Debugger &debugger,

diff  --git 
a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp 
b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index 1f6116967a4f64..7498a070c26094 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -3419,7 +3419,7 @@ bool GDBRemoteCommunicationClient::GetFileExists(
 }
 
 bool GDBRemoteCommunicationClient::CalculateMD5(
-    const lldb_private::FileSpec &file_spec, uint64_t &high, uint64_t &low) {
+    const lldb_private::FileSpec &file_spec, uint64_t &low, uint64_t &high) {
   std::string path(file_spec.GetPath(false));
   lldb_private::StreamString stream;
   stream.PutCString("vFile:MD5:");
@@ -3433,8 +3433,44 @@ bool GDBRemoteCommunicationClient::CalculateMD5(
       return false;
     if (response.Peek() && *response.Peek() == 'x')
       return false;
-    low = response.GetHexMaxU64(false, UINT64_MAX);
-    high = response.GetHexMaxU64(false, UINT64_MAX);
+
+    // GDBRemoteCommunicationServerCommon::Handle_vFile_MD5 concatenates low 
and
+    // high hex strings. We can't use response.GetHexMaxU64 because that can't
+    // handle the concatenated hex string. What would happen is parsing the low
+    // would consume the whole response packet which would give incorrect
+    // results. Instead, we get the byte string for each low and high hex
+    // separately, and parse them.
+    //
+    // An alternate way to handle this is to change the server to put a
+    // delimiter between the low/high parts, and change the client to parse the
+    // delimiter. However, we choose not to do this so existing lldb-servers
+    // don't have to be patched
+
+    // The checksum is 128 bits encoded as hex
+    // This means low/high are halves of 64 bits each, in otherwords, 8 bytes.
+    // Each byte takes 2 hex characters in the response.
+    const size_t MD5_HALF_LENGTH = sizeof(uint64_t) * 2;
+
+    // Get low part
+    auto part =
+        response.GetStringRef().substr(response.GetFilePos(), MD5_HALF_LENGTH);
+    if (part.size() != MD5_HALF_LENGTH)
+      return false;
+    response.SetFilePos(response.GetFilePos() + part.size());
+
+    if (part.getAsInteger(/*radix=*/16, low))
+      return false;
+
+    // Get high part
+    part =
+        response.GetStringRef().substr(response.GetFilePos(), MD5_HALF_LENGTH);
+    if (part.size() != MD5_HALF_LENGTH)
+      return false;
+    response.SetFilePos(response.GetFilePos() + part.size());
+
+    if (part.getAsInteger(/*radix=*/16, high))
+      return false;
+
     return true;
   }
   return false;

diff  --git 
a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h 
b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
index bd2d3e232b4645..4be7eb00f42b97 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
@@ -392,7 +392,7 @@ class GDBRemoteCommunicationClient : public 
GDBRemoteClientBase {
           *command_output, // Pass nullptr if you don't want the command output
       const Timeout<std::micro> &timeout);
 
-  bool CalculateMD5(const FileSpec &file_spec, uint64_t &high, uint64_t &low);
+  bool CalculateMD5(const FileSpec &file_spec, uint64_t &low, uint64_t &high);
 
   lldb::DataBufferSP ReadRegister(
       lldb::tid_t tid,

diff  --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index 4ce290dfbe035f..91483ba008f4a3 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -1197,6 +1197,32 @@ Status Platform::PutFile(const FileSpec &source, const 
FileSpec &destination,
   if (!source_file)
     return Status(source_file.takeError());
   Status error;
+
+  bool requires_upload = true;
+  uint64_t dest_md5_low, dest_md5_high;
+  bool success = CalculateMD5(destination, dest_md5_low, dest_md5_high);
+  if (!success) {
+    LLDB_LOGF(log, "[PutFile] couldn't get md5 sum of destination");
+  } else {
+    auto local_md5 = llvm::sys::fs::md5_contents(source.GetPath());
+    if (!local_md5) {
+      LLDB_LOGF(log, "[PutFile] couldn't get md5 sum of source");
+    } else {
+      const auto [local_md5_high, local_md5_low] = local_md5->words();
+      LLDB_LOGF(log, "[PutFile] destination md5: %016" PRIx64 "%016" PRIx64,
+                dest_md5_high, dest_md5_low);
+      LLDB_LOGF(log, "[PutFile]       local md5: %016" PRIx64 "%016" PRIx64,
+                local_md5_high, local_md5_low);
+      requires_upload =
+          local_md5_high != dest_md5_high || local_md5_low != dest_md5_low;
+    }
+  }
+
+  if (!requires_upload) {
+    LLDB_LOGF(log, "[PutFile] skipping PutFile because md5sums match");
+    return error;
+  }
+
   uint32_t permissions = source_file.get()->GetPermissions(error);
   if (permissions == 0)
     permissions = lldb::eFilePermissionsFileDefault;

diff  --git 
a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp 
b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
index f93cf8ce679c5b..6b11ec43a65dd8 100644
--- a/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
+++ b/lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationClientTest.cpp
@@ -592,3 +592,26 @@ TEST_F(GDBRemoteCommunicationClientTest, WriteMemoryTags) {
                  std::vector<uint8_t>{0x99}, "QMemTags:456789,0:80000000:99",
                  "E03", false);
 }
+
+TEST_F(GDBRemoteCommunicationClientTest, CalculateMD5) {
+  FileSpec file_spec("/foo/bar", FileSpec::Style::posix);
+  uint64_t low, high;
+  std::future<bool> async_result = std::async(std::launch::async, [&] {
+    return client.CalculateMD5(file_spec, low, high);
+  });
+
+  lldb_private::StreamString stream;
+  stream.PutCString("vFile:MD5:");
+  stream.PutStringAsRawHex8(file_spec.GetPath(false));
+  HandlePacket(server, stream.GetString().str(),
+               "F,"
+               "deadbeef01020304"
+               "05060708deadbeef");
+  ASSERT_TRUE(async_result.get());
+
+  // Server and client puts/parses low, and then high
+  const uint64_t expected_low = 0xdeadbeef01020304;
+  const uint64_t expected_high = 0x05060708deadbeef;
+  EXPECT_EQ(expected_low, low);
+  EXPECT_EQ(expected_high, high);
+}


        
_______________________________________________
lldb-commits mailing list
lldb-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to