arames created this revision.
Herald added subscribers: dexonsmith, usaxena95, kadircet, arphaman, hiraditya.
arames requested review of this revision.
Herald added projects: clang, LLVM, clang-tools-extra.
Herald added subscribers: cfe-commits, llvm-commits.

`size_t` varying across platforms can cause issues, for example mismatching
hashes when cross-compiling modules from a 64bit target to a 32bit target.

Although the comments note that `hash_code` values "are not stable to save or
persist", it is effectively used in such cases today, for example for implicit
module hashes.

Since hashing mechanisms in practice already use `uint64_t` for computations,
use `uint64_t` instead of `size_t` to store the value of `hash_code`.
This is similar to earlier changes in c0445098519defc4bd13624095fac92977c9cfe4.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D102943

Files:
  clang-tools-extra/clangd/index/SymbolID.cpp
  clang/include/clang/AST/DataCollection.h
  clang/lib/Lex/HeaderSearch.cpp
  llvm/include/llvm/ADT/Hashing.h
  llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
  llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
  llvm/unittests/ADT/HashingTest.cpp

Index: llvm/unittests/ADT/HashingTest.cpp
===================================================================
--- llvm/unittests/ADT/HashingTest.cpp
+++ llvm/unittests/ADT/HashingTest.cpp
@@ -22,7 +22,7 @@
 
 // Helper for test code to print hash codes.
 void PrintTo(const hash_code &code, std::ostream *os) {
-  *os << static_cast<size_t>(code);
+  *os << static_cast<uint64_t>(code);
 }
 
 // Fake an object that is recognized as hashable data to test super large
@@ -134,7 +134,7 @@
 
 // Provide a dummy, hashable type designed for easy verification: its hash is
 // the same as its value.
-struct HashableDummy { size_t value; };
+struct HashableDummy { uint64_t value; };
 hash_code hash_value(HashableDummy dummy) { return dummy.value; }
 
 TEST(HashingTest, HashCombineRangeBasicTest) {
@@ -172,7 +172,7 @@
   EXPECT_NE(dummy_hash, arr4_hash);
   EXPECT_NE(arr1_hash, arr4_hash);
 
-  const size_t arr5[] = { 1, 2, 3 };
+  const uint64_t arr5[] = { 1, 2, 3 };
   const HashableDummy d_arr5[] = { {1}, {2}, {3} };
   hash_code arr5_hash = hash_combine_range(begin(arr5), end(arr5));
   hash_code d_arr5_hash = hash_combine_range(begin(d_arr5), end(d_arr5));
@@ -182,11 +182,11 @@
 TEST(HashingTest, HashCombineRangeLengthDiff) {
   // Test that as only the length varies, we compute different hash codes for
   // sequences.
-  std::map<size_t, size_t> code_to_size;
+  std::map<uint64_t, size_t> code_to_size;
   std::vector<char> all_one_c(256, '\xff');
   for (unsigned Idx = 1, Size = all_one_c.size(); Idx < Size; ++Idx) {
     hash_code code = hash_combine_range(&all_one_c[0], &all_one_c[0] + Idx);
-    std::map<size_t, size_t>::iterator
+    std::map<uint64_t, size_t>::iterator
       I = code_to_size.insert(std::make_pair(code, Idx)).first;
     EXPECT_EQ(Idx, I->second);
   }
@@ -194,7 +194,7 @@
   std::vector<char> all_zero_c(256, '\0');
   for (unsigned Idx = 1, Size = all_zero_c.size(); Idx < Size; ++Idx) {
     hash_code code = hash_combine_range(&all_zero_c[0], &all_zero_c[0] + Idx);
-    std::map<size_t, size_t>::iterator
+    std::map<uint64_t, size_t>::iterator
       I = code_to_size.insert(std::make_pair(code, Idx)).first;
     EXPECT_EQ(Idx, I->second);
   }
@@ -202,7 +202,7 @@
   std::vector<unsigned> all_one_int(512, -1);
   for (unsigned Idx = 1, Size = all_one_int.size(); Idx < Size; ++Idx) {
     hash_code code = hash_combine_range(&all_one_int[0], &all_one_int[0] + Idx);
-    std::map<size_t, size_t>::iterator
+    std::map<uint64_t, size_t>::iterator
       I = code_to_size.insert(std::make_pair(code, Idx)).first;
     EXPECT_EQ(Idx, I->second);
   }
@@ -210,7 +210,7 @@
   std::vector<unsigned> all_zero_int(512, 0);
   for (unsigned Idx = 1, Size = all_zero_int.size(); Idx < Size; ++Idx) {
     hash_code code = hash_combine_range(&all_zero_int[0], &all_zero_int[0] + Idx);
-    std::map<size_t, size_t>::iterator
+    std::map<uint64_t, size_t>::iterator
       I = code_to_size.insert(std::make_pair(code, Idx)).first;
     EXPECT_EQ(Idx, I->second);
   }
@@ -283,8 +283,7 @@
     fprintf(stderr, " { %-35s 0x%016llxULL },\n",
             member_str.c_str(), static_cast<uint64_t>(hash));
 #endif
-    EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
-              static_cast<size_t>(hash));
+    EXPECT_EQ(golden_data[i].hash, static_cast<uint64_t>(hash));
   }
 }
 
@@ -304,9 +303,9 @@
   // Hashing a sequence of heterogeneous types which *happen* to all produce the
   // same data for hashing produces the same as a range-based hash of the
   // fundamental values.
-  const size_t s1 = 1024, s2 = 8888, s3 = 9000000;
+  const uint64_t s1 = 1024, s2 = 8888, s3 = 9000000;
   const HashableDummy d1 = { 1024 }, d2 = { 8888 }, d3 = { 9000000 };
-  const size_t arr2[] = { s1, s2, s3 };
+  const uint64_t arr2[] = { s1, s2, s3 };
   EXPECT_EQ(hash_combine_range(begin(arr2), end(arr2)),
             hash_combine(s1, s2, s3));
   EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(s1, s2, d3));
Index: llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
===================================================================
--- llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -345,10 +345,8 @@
             HC = hash_combine(HC, hash_combine_range(GVName.begin(), GVName.end()));
           }
           raw_string_ostream(SubModuleName)
-            << ".submodule."
-            << formatv(sizeof(size_t) == 8 ? "{0:x16}" : "{0:x8}",
-                       static_cast<size_t>(HC))
-            << ".ll";
+              << ".submodule." << formatv("{0:x16}", static_cast<uint64_t>(HC))
+              << ".ll";
         }
 
         // Extract the requested partiton (plus any necessary aliases) and
Index: llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
===================================================================
--- llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -304,7 +304,7 @@
                  unsigned NumBreakDowns) {
   if (LLVM_LIKELY(NumBreakDowns == 1))
     return hash_value(*BreakDown);
-  SmallVector<size_t, 8> Hashes(NumBreakDowns);
+  SmallVector<uint64_t, 8> Hashes(NumBreakDowns);
   for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx)
     Hashes.push_back(hash_value(BreakDown[Idx]));
   return hash_combine_range(Hashes.begin(), Hashes.end());
Index: llvm/include/llvm/ADT/Hashing.h
===================================================================
--- llvm/include/llvm/ADT/Hashing.h
+++ llvm/include/llvm/ADT/Hashing.h
@@ -70,7 +70,7 @@
 ///   llvm::hash_code code = hash_value(x);
 /// \endcode
 class hash_code {
-  size_t value;
+  uint64_t value;
 
 public:
   /// Default construct a hash_code.
@@ -78,10 +78,10 @@
   hash_code() = default;
 
   /// Form a hash code directly from a numerical value.
-  hash_code(size_t value) : value(value) {}
+  hash_code(uint64_t value) : value(value) {}
 
   /// Convert the hash code to its numerical value for use.
-  /*explicit*/ operator size_t() const { return value; }
+  /*explicit*/ operator uint64_t() const { return value; }
 
   friend bool operator==(const hash_code &lhs, const hash_code &rhs) {
     return lhs.value == rhs.value;
@@ -91,7 +91,7 @@
   }
 
   /// Allow a hash_code to be directly run through hash_value.
-  friend size_t hash_value(const hash_code &code) { return code.value; }
+  friend uint64_t hash_value(const hash_code &code) { return code.value; }
 };
 
 /// Compute a hash_code for any integer value.
@@ -372,7 +372,7 @@
 /// This variant is enabled when we must first call hash_value and use the
 /// result as our data.
 template <typename T>
-std::enable_if_t<!is_hashable_data<T>::value, size_t>
+std::enable_if_t<!is_hashable_data<T>::value, uint64_t>
 get_hashable_data(const T &value) {
   using ::llvm::hash_value;
   return hash_value(value);
Index: clang/lib/Lex/HeaderSearch.cpp
===================================================================
--- clang/lib/Lex/HeaderSearch.cpp
+++ clang/lib/Lex/HeaderSearch.cpp
@@ -223,7 +223,7 @@
       llvm::hash_combine(DirName.lower(), FileName.lower());
 
     SmallString<128> HashStr;
-    llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);
+    llvm::APInt(64, uint64_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);
     llvm::sys::path::append(Result, ModuleName + "-" + HashStr + ".pcm");
   }
   return Result.str().str();
Index: clang/include/clang/AST/DataCollection.h
===================================================================
--- clang/include/clang/AST/DataCollection.h
+++ clang/include/clang/AST/DataCollection.h
@@ -51,7 +51,8 @@
 
 template <class T, class Type>
 std::enable_if_t<std::is_integral<Type>::value || std::is_enum<Type>::value ||
-                 std::is_convertible<Type, size_t>::value // for llvm::hash_code
+                 std::is_convertible<Type, uint64_t>::value // for
+                                                            // llvm::hash_code
                  >
 addDataToConsumer(T &DataConsumer, Type Data) {
   DataConsumer.update(StringRef(reinterpret_cast<char *>(&Data), sizeof(Data)));
Index: clang-tools-extra/clangd/index/SymbolID.cpp
===================================================================
--- clang-tools-extra/clangd/index/SymbolID.cpp
+++ clang-tools-extra/clangd/index/SymbolID.cpp
@@ -48,10 +48,10 @@
 
 llvm::hash_code hash_value(const SymbolID &ID) {
   // We already have a good hash, just return the first bytes.
-  static_assert(sizeof(size_t) <= SymbolID::RawSize,
-                "size_t longer than SHA1!");
-  size_t Result;
-  memcpy(&Result, ID.raw().data(), sizeof(size_t));
+  static_assert(sizeof(uint64_t) <= SymbolID::RawSize,
+                "uint64_t longer than SHA1!");
+  uint64_t Result;
+  memcpy(&Result, ID.raw().data(), sizeof(uint64_t));
   return llvm::hash_code(Result);
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to