arames created this revision. Herald added subscribers: dexonsmith, usaxena95, kadircet, arphaman, hiraditya. arames requested review of this revision. Herald added projects: clang, LLVM, clang-tools-extra. Herald added subscribers: cfe-commits, llvm-commits.
`size_t` varying across platforms can cause issues, for example mismatching hashes when cross-compiling modules from a 64bit target to a 32bit target. Although the comments note that `hash_code` values "are not stable to save or persist", it is effectively used in such cases today, for example for implicit module hashes. Since hashing mechanisms in practice already use `uint64_t` for computations, use `uint64_t` instead of `size_t` to store the value of `hash_code`. This is similar to earlier changes in c0445098519defc4bd13624095fac92977c9cfe4. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D102943 Files: clang-tools-extra/clangd/index/SymbolID.cpp clang/include/clang/AST/DataCollection.h clang/lib/Lex/HeaderSearch.cpp llvm/include/llvm/ADT/Hashing.h llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp llvm/unittests/ADT/HashingTest.cpp
Index: llvm/unittests/ADT/HashingTest.cpp =================================================================== --- llvm/unittests/ADT/HashingTest.cpp +++ llvm/unittests/ADT/HashingTest.cpp @@ -22,7 +22,7 @@ // Helper for test code to print hash codes. void PrintTo(const hash_code &code, std::ostream *os) { - *os << static_cast<size_t>(code); + *os << static_cast<uint64_t>(code); } // Fake an object that is recognized as hashable data to test super large @@ -134,7 +134,7 @@ // Provide a dummy, hashable type designed for easy verification: its hash is // the same as its value. -struct HashableDummy { size_t value; }; +struct HashableDummy { uint64_t value; }; hash_code hash_value(HashableDummy dummy) { return dummy.value; } TEST(HashingTest, HashCombineRangeBasicTest) { @@ -172,7 +172,7 @@ EXPECT_NE(dummy_hash, arr4_hash); EXPECT_NE(arr1_hash, arr4_hash); - const size_t arr5[] = { 1, 2, 3 }; + const uint64_t arr5[] = { 1, 2, 3 }; const HashableDummy d_arr5[] = { {1}, {2}, {3} }; hash_code arr5_hash = hash_combine_range(begin(arr5), end(arr5)); hash_code d_arr5_hash = hash_combine_range(begin(d_arr5), end(d_arr5)); @@ -182,11 +182,11 @@ TEST(HashingTest, HashCombineRangeLengthDiff) { // Test that as only the length varies, we compute different hash codes for // sequences. - std::map<size_t, size_t> code_to_size; + std::map<uint64_t, size_t> code_to_size; std::vector<char> all_one_c(256, '\xff'); for (unsigned Idx = 1, Size = all_one_c.size(); Idx < Size; ++Idx) { hash_code code = hash_combine_range(&all_one_c[0], &all_one_c[0] + Idx); - std::map<size_t, size_t>::iterator + std::map<uint64_t, size_t>::iterator I = code_to_size.insert(std::make_pair(code, Idx)).first; EXPECT_EQ(Idx, I->second); } @@ -194,7 +194,7 @@ std::vector<char> all_zero_c(256, '\0'); for (unsigned Idx = 1, Size = all_zero_c.size(); Idx < Size; ++Idx) { hash_code code = hash_combine_range(&all_zero_c[0], &all_zero_c[0] + Idx); - std::map<size_t, size_t>::iterator + std::map<uint64_t, size_t>::iterator I = code_to_size.insert(std::make_pair(code, Idx)).first; EXPECT_EQ(Idx, I->second); } @@ -202,7 +202,7 @@ std::vector<unsigned> all_one_int(512, -1); for (unsigned Idx = 1, Size = all_one_int.size(); Idx < Size; ++Idx) { hash_code code = hash_combine_range(&all_one_int[0], &all_one_int[0] + Idx); - std::map<size_t, size_t>::iterator + std::map<uint64_t, size_t>::iterator I = code_to_size.insert(std::make_pair(code, Idx)).first; EXPECT_EQ(Idx, I->second); } @@ -210,7 +210,7 @@ std::vector<unsigned> all_zero_int(512, 0); for (unsigned Idx = 1, Size = all_zero_int.size(); Idx < Size; ++Idx) { hash_code code = hash_combine_range(&all_zero_int[0], &all_zero_int[0] + Idx); - std::map<size_t, size_t>::iterator + std::map<uint64_t, size_t>::iterator I = code_to_size.insert(std::make_pair(code, Idx)).first; EXPECT_EQ(Idx, I->second); } @@ -283,8 +283,7 @@ fprintf(stderr, " { %-35s 0x%016llxULL },\n", member_str.c_str(), static_cast<uint64_t>(hash)); #endif - EXPECT_EQ(static_cast<size_t>(golden_data[i].hash), - static_cast<size_t>(hash)); + EXPECT_EQ(golden_data[i].hash, static_cast<uint64_t>(hash)); } } @@ -304,9 +303,9 @@ // Hashing a sequence of heterogeneous types which *happen* to all produce the // same data for hashing produces the same as a range-based hash of the // fundamental values. - const size_t s1 = 1024, s2 = 8888, s3 = 9000000; + const uint64_t s1 = 1024, s2 = 8888, s3 = 9000000; const HashableDummy d1 = { 1024 }, d2 = { 8888 }, d3 = { 9000000 }; - const size_t arr2[] = { s1, s2, s3 }; + const uint64_t arr2[] = { s1, s2, s3 }; EXPECT_EQ(hash_combine_range(begin(arr2), end(arr2)), hash_combine(s1, s2, s3)); EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(s1, s2, d3)); Index: llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp =================================================================== --- llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp +++ llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp @@ -345,10 +345,8 @@ HC = hash_combine(HC, hash_combine_range(GVName.begin(), GVName.end())); } raw_string_ostream(SubModuleName) - << ".submodule." - << formatv(sizeof(size_t) == 8 ? "{0:x16}" : "{0:x8}", - static_cast<size_t>(HC)) - << ".ll"; + << ".submodule." << formatv("{0:x16}", static_cast<uint64_t>(HC)) + << ".ll"; } // Extract the requested partiton (plus any necessary aliases) and Index: llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -304,7 +304,7 @@ unsigned NumBreakDowns) { if (LLVM_LIKELY(NumBreakDowns == 1)) return hash_value(*BreakDown); - SmallVector<size_t, 8> Hashes(NumBreakDowns); + SmallVector<uint64_t, 8> Hashes(NumBreakDowns); for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx) Hashes.push_back(hash_value(BreakDown[Idx])); return hash_combine_range(Hashes.begin(), Hashes.end()); Index: llvm/include/llvm/ADT/Hashing.h =================================================================== --- llvm/include/llvm/ADT/Hashing.h +++ llvm/include/llvm/ADT/Hashing.h @@ -70,7 +70,7 @@ /// llvm::hash_code code = hash_value(x); /// \endcode class hash_code { - size_t value; + uint64_t value; public: /// Default construct a hash_code. @@ -78,10 +78,10 @@ hash_code() = default; /// Form a hash code directly from a numerical value. - hash_code(size_t value) : value(value) {} + hash_code(uint64_t value) : value(value) {} /// Convert the hash code to its numerical value for use. - /*explicit*/ operator size_t() const { return value; } + /*explicit*/ operator uint64_t() const { return value; } friend bool operator==(const hash_code &lhs, const hash_code &rhs) { return lhs.value == rhs.value; @@ -91,7 +91,7 @@ } /// Allow a hash_code to be directly run through hash_value. - friend size_t hash_value(const hash_code &code) { return code.value; } + friend uint64_t hash_value(const hash_code &code) { return code.value; } }; /// Compute a hash_code for any integer value. @@ -372,7 +372,7 @@ /// This variant is enabled when we must first call hash_value and use the /// result as our data. template <typename T> -std::enable_if_t<!is_hashable_data<T>::value, size_t> +std::enable_if_t<!is_hashable_data<T>::value, uint64_t> get_hashable_data(const T &value) { using ::llvm::hash_value; return hash_value(value); Index: clang/lib/Lex/HeaderSearch.cpp =================================================================== --- clang/lib/Lex/HeaderSearch.cpp +++ clang/lib/Lex/HeaderSearch.cpp @@ -223,7 +223,7 @@ llvm::hash_combine(DirName.lower(), FileName.lower()); SmallString<128> HashStr; - llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36); + llvm::APInt(64, uint64_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36); llvm::sys::path::append(Result, ModuleName + "-" + HashStr + ".pcm"); } return Result.str().str(); Index: clang/include/clang/AST/DataCollection.h =================================================================== --- clang/include/clang/AST/DataCollection.h +++ clang/include/clang/AST/DataCollection.h @@ -51,7 +51,8 @@ template <class T, class Type> std::enable_if_t<std::is_integral<Type>::value || std::is_enum<Type>::value || - std::is_convertible<Type, size_t>::value // for llvm::hash_code + std::is_convertible<Type, uint64_t>::value // for + // llvm::hash_code > addDataToConsumer(T &DataConsumer, Type Data) { DataConsumer.update(StringRef(reinterpret_cast<char *>(&Data), sizeof(Data))); Index: clang-tools-extra/clangd/index/SymbolID.cpp =================================================================== --- clang-tools-extra/clangd/index/SymbolID.cpp +++ clang-tools-extra/clangd/index/SymbolID.cpp @@ -48,10 +48,10 @@ llvm::hash_code hash_value(const SymbolID &ID) { // We already have a good hash, just return the first bytes. - static_assert(sizeof(size_t) <= SymbolID::RawSize, - "size_t longer than SHA1!"); - size_t Result; - memcpy(&Result, ID.raw().data(), sizeof(size_t)); + static_assert(sizeof(uint64_t) <= SymbolID::RawSize, + "uint64_t longer than SHA1!"); + uint64_t Result; + memcpy(&Result, ID.raw().data(), sizeof(uint64_t)); return llvm::hash_code(Result); }
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits