llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-modules

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

clangSerialization currently uses hash_combine/hash_value from
Hashing.h, which are not guaranteed to be deterministic.
Replace these uses with xxh3_64bits.


---
Full diff: https://github.com/llvm/llvm-project/pull/96136.diff


4 Files Affected:

- (modified) clang/lib/AST/ODRHash.cpp (+1-1) 
- (modified) clang/lib/Serialization/ASTReader.cpp (+6-4) 
- (modified) clang/lib/Serialization/ASTWriter.cpp (+5-2) 
- (modified) llvm/include/llvm/ADT/FoldingSet.h (+19-4) 


``````````diff
diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp
index 1249531eab09f..fbfe92318dc5e 100644
--- a/clang/lib/AST/ODRHash.cpp
+++ b/clang/lib/AST/ODRHash.cpp
@@ -251,7 +251,7 @@ unsigned ODRHash::CalculateHash() {
 
   assert(I == Bools.rend());
   Bools.clear();
-  return ID.ComputeHash();
+  return ID.computeStableHash();
 }
 
 namespace {
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 973475cf56b8c..0bf8e416aff4a 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1197,7 +1197,7 @@ unsigned DeclarationNameKey::getHash() const {
     break;
   }
 
-  return ID.ComputeHash();
+  return ID.computeStableHash();
 }
 
 ModuleFile *
@@ -2029,7 +2029,10 @@ const FileEntry *HeaderFileInfoTrait::getFile(const 
internal_key_type &Key) {
 }
 
 unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
-  return llvm::hash_combine(ikey.Size, ikey.ModTime);
+  uint8_t buf[sizeof(ikey.Size) + sizeof(ikey.ModTime)];
+  memcpy(buf, &ikey.Size, sizeof(ikey.Size));
+  memcpy(buf + sizeof(ikey.Size), &ikey.ModTime, sizeof(ikey.ModTime));
+  return llvm::xxh3_64bits(buf);
 }
 
 HeaderFileInfoTrait::internal_key_type
@@ -2636,8 +2639,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned 
ID, bool Complain) {
       return OriginalChange;
     }
 
-    // FIXME: hash_value is not guaranteed to be stable!
-    auto ContentHash = hash_value(MemBuffOrError.get()->getBuffer());
+    auto ContentHash = xxh3_64bits(MemBuffOrError.get()->getBuffer());
     if (StoredContentHash == static_cast<uint64_t>(ContentHash))
       return Change{Change::None};
 
diff --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 50fa44d34f524..546ebcc7b9a19 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1782,7 +1782,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
             .ValidateASTInputFilesContent) {
       auto MemBuff = Cache->getBufferIfLoaded();
       if (MemBuff)
-        ContentHash = hash_value(MemBuff->getBuffer());
+        ContentHash = xxh3_64bits(MemBuff->getBuffer());
       else
         PP->Diag(SourceLocation(), diag::err_module_unable_to_hash_content)
             << Entry.File.getName();
@@ -1987,7 +1987,10 @@ namespace {
       // The hash is based only on size/time of the file, so that the reader 
can
       // match even when symlinking or excess path elements ("foo/../", "../")
       // change the form of the name. However, complete path is still the key.
-      return llvm::hash_combine(key.Size, key.ModTime);
+      uint8_t buf[sizeof(key.Size) + sizeof(key.ModTime)];
+      memcpy(buf, &key.Size, sizeof(key.Size));
+      memcpy(buf + sizeof(key.Size), &key.ModTime, sizeof(key.ModTime));
+      return llvm::xxh3_64bits(buf);
     }
 
     std::pair<unsigned, unsigned>
diff --git a/llvm/include/llvm/ADT/FoldingSet.h 
b/llvm/include/llvm/ADT/FoldingSet.h
index f82eabd5044b2..3c2eaade57e47 100644
--- a/llvm/include/llvm/ADT/FoldingSet.h
+++ b/llvm/include/llvm/ADT/FoldingSet.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/xxhash.h"
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
@@ -294,12 +295,19 @@ class FoldingSetNodeIDRef {
   FoldingSetNodeIDRef() = default;
   FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
 
-  /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
-  /// used to lookup the node in the FoldingSetBase.
+  // Compute a strong hash value used to lookup the node in the FoldingSetBase.
+  // The hash value is not guaranteed to be deterministic across processes.
   unsigned ComputeHash() const {
     return static_cast<unsigned>(hash_combine_range(Data, Data + Size));
   }
 
+  // Compute a deterministic hash value across processes that is suitable for
+  // on-disk serialization.
+  unsigned computeStableHash() const {
+    return static_cast<unsigned>(xxh3_64bits(ArrayRef(
+        reinterpret_cast<const uint8_t *>(Data), sizeof(unsigned) * Size)));
+  }
+
   bool operator==(FoldingSetNodeIDRef) const;
 
   bool operator!=(FoldingSetNodeIDRef RHS) const { return !(*this == RHS); }
@@ -366,12 +374,19 @@ class FoldingSetNodeID {
   /// object to be used to compute a new profile.
   inline void clear() { Bits.clear(); }
 
-  /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used
-  /// to lookup the node in the FoldingSetBase.
+  // Compute a strong hash value for this FoldingSetNodeID, used to lookup the
+  // node in the FoldingSetBase. The hash value is not guaranteed to be
+  // deterministic across processes.
   unsigned ComputeHash() const {
     return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
   }
 
+  // Compute a deterministic hash value across processes that is suitable for
+  // on-disk serialization.
+  unsigned computeStableHash() const {
+    return FoldingSetNodeIDRef(Bits.data(), Bits.size()).computeStableHash();
+  }
+
   /// operator== - Used to compare two nodes to each other.
   bool operator==(const FoldingSetNodeID &RHS) const;
   bool operator==(const FoldingSetNodeIDRef RHS) const;

``````````

</details>


https://github.com/llvm/llvm-project/pull/96136
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to