Author: Martin Storsjö Date: 2026-05-29T17:36:22+03:00 New Revision: 7aa76e1f19ed6cfa046f87b5abe8f5034ac6edd3
URL: https://github.com/llvm/llvm-project/commit/7aa76e1f19ed6cfa046f87b5abe8f5034ac6edd3 DIFF: https://github.com/llvm/llvm-project/commit/7aa76e1f19ed6cfa046f87b5abe8f5034ac6edd3.diff LOG: Revert "[DenseMap] Replace tombstone deletion with TAOCP 6.4 Algorithm R (#19…" This reverts commit c4d820ce311b74acf70e52ec04856fc89b503ab9. Added: Modified: llvm/include/llvm/ADT/DenseMap.h llvm/lib/IR/Value.cpp llvm/unittests/ADT/BitVectorTest.cpp Removed: ################################################################################ diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h index b52906a215a7c..081d7e198e94c 100644 --- a/llvm/include/llvm/ADT/DenseMap.h +++ b/llvm/include/llvm/ADT/DenseMap.h @@ -53,10 +53,6 @@ struct DenseMapPair : std::pair<KeyT, ValueT> { } // end namespace detail -// Befriended below so DenseMapBase can expose its bucket-relocation callback -// erase to ValueHandleBase, the only caller that caches bucket pointers. -class ValueHandleBase; - template <typename KeyT, typename ValueT, typename KeyInfoT = DenseMapInfo<KeyT>, typename Bucket = llvm::detail::DenseMapPair<KeyT, ValueT>, @@ -124,7 +120,7 @@ class DenseMapBase : public DebugEpochBase { void clear() { incrementEpoch(); - if (getNumEntries() == 0) + if (getNumEntries() == 0 && getNumTombstones() == 0) return; // If the capacity of the array is huge, and the # elements used is small, @@ -140,11 +136,14 @@ class DenseMapBase : public DebugEpochBase { for (BucketT &B : buckets()) B.getFirst() = EmptyKey; } else { + const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); unsigned NumEntries = getNumEntries(); for (BucketT &B : buckets()) { if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) { - B.getSecond().~ValueT(); - --NumEntries; + if (!KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) { + B.getSecond().~ValueT(); + --NumEntries; + } B.getFirst() = EmptyKey; } } @@ -152,6 +151,7 @@ class DenseMapBase : public DebugEpochBase { (void)NumEntries; } setNumEntries(0); + setNumTombstones(0); } void shrink_and_clear() { @@ -325,19 +325,26 @@ class DenseMapBase : public DebugEpochBase { return Ret; } - void eraseFromFilledBucket(BucketT *TheBucket) { - eraseFromFilledBucket(TheBucket, [](BucketT &) {}); - } - bool erase(const KeyT &Val) { BucketT *TheBucket = doFind(Val); if (!TheBucket) return false; // not in map. - eraseFromFilledBucket(TheBucket); + incrementEpoch(); + TheBucket->getSecond().~ValueT(); + TheBucket->getFirst() = KeyInfoT::getTombstoneKey(); + decrementNumEntries(); + incrementNumTombstones(); return true; } - void erase(iterator I) { eraseFromFilledBucket(&*I); } + void erase(iterator I) { + BucketT *TheBucket = &*I; + incrementEpoch(); + TheBucket->getSecond().~ValueT(); + TheBucket->getFirst() = KeyInfoT::getTombstoneKey(); + decrementNumEntries(); + incrementNumTombstones(); + } /// Remove entries that match the given predicate. \p Pred is invoked /// with a reference to each live bucket and must not access the map being @@ -347,22 +354,22 @@ class DenseMapBase : public DebugEpochBase { /// into the map are invalidated. template <typename Predicate> bool remove_if(Predicate Pred) { const KeyT EmptyKey = KeyInfoT::getEmptyKey(); - unsigned NumBuckets = getNumBuckets(); + const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); bool Removed = false; for (BucketT &B : buckets()) { - if (KeyInfoT::isEqual(B.getFirst(), EmptyKey)) + if (KeyInfoT::isEqual(B.getFirst(), EmptyKey) || + KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) continue; if (Pred(B)) { B.getSecond().~ValueT(); - B.getFirst() = EmptyKey; + B.getFirst() = TombstoneKey; decrementNumEntries(); + incrementNumTombstones(); Removed = true; } } - if (Removed) { + if (Removed) incrementEpoch(); - this->grow(NumBuckets); - } return Removed; } @@ -399,10 +406,12 @@ class DenseMapBase : public DebugEpochBase { struct ExactBucketCount {}; void initWithExactBucketCount(unsigned NewNumBuckets) { - if (derived().allocateBuckets(NewNumBuckets)) + if (derived().allocateBuckets(NewNumBuckets)) { initEmpty(); - else + } else { setNumEntries(0); + setNumTombstones(0); + } } void destroyAll() { @@ -416,8 +425,10 @@ class DenseMapBase : public DebugEpochBase { return; const KeyT EmptyKey = KeyInfoT::getEmptyKey(); + const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); for (BucketT &B : buckets()) { - if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) + if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) && + !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) B.getSecond().~ValueT(); B.getFirst().~KeyT(); } @@ -427,6 +438,7 @@ class DenseMapBase : public DebugEpochBase { static_assert(std::is_base_of_v<DenseMapBase, DerivedT>, "Must pass the derived type to this template!"); setNumEntries(0); + setNumTombstones(0); assert((getNumBuckets() & (getNumBuckets() - 1)) == 0 && "# initial buckets must be a power of two!"); @@ -451,8 +463,10 @@ class DenseMapBase : public DebugEpochBase { void moveFrom(DerivedT &Other) { // Insert all the old elements. const KeyT EmptyKey = KeyInfoT::getEmptyKey(); + const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); for (BucketT &B : Other.buckets()) { - if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey)) { + if (!KeyInfoT::isEqual(B.getFirst(), EmptyKey) && + !KeyInfoT::isEqual(B.getFirst(), TombstoneKey)) { // Insert the key/value into the new table. BucketT *DestBucket; bool FoundVal = LookupBucketFor(B.getFirst(), DestBucket); @@ -474,6 +488,7 @@ class DenseMapBase : public DebugEpochBase { this->destroyAll(); derived().deallocateBuckets(); setNumEntries(0); + setNumTombstones(0); if (!derived().allocateBuckets(other.getNumBuckets())) { // The bucket list is empty. No work to do. return; @@ -483,6 +498,7 @@ class DenseMapBase : public DebugEpochBase { assert(getNumBuckets() == other.getNumBuckets()); setNumEntries(other.getNumEntries()); + setNumTombstones(other.getNumTombstones()); BucketT *Buckets = getBuckets(); const BucketT *OtherBuckets = other.getBuckets(); @@ -493,66 +509,17 @@ class DenseMapBase : public DebugEpochBase { NumBuckets * sizeof(BucketT)); } else { const KeyT EmptyKey = KeyInfoT::getEmptyKey(); + const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); for (size_t I = 0; I < NumBuckets; ++I) { ::new (&Buckets[I].getFirst()) KeyT(OtherBuckets[I].getFirst()); - if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey)) + if (!KeyInfoT::isEqual(Buckets[I].getFirst(), EmptyKey) && + !KeyInfoT::isEqual(Buckets[I].getFirst(), TombstoneKey)) ::new (&Buckets[I].getSecond()) ValueT(OtherBuckets[I].getSecond()); } } } private: - // ValueHandleBase caches pointers into the bucket array, so it needs the - // callback erase below to fix them up as entries shift. It is the only - // intended caller; do not add new ones. - friend class ValueHandleBase; - - /// Erase the entry at \p TheBucket and close the resulting hole via Knuth - /// TAOCP 6.4 Algorithm R. For callers that cache pointers into the bucket - /// array, call \p OnMoved per shifted bucket. - template <typename OnMovedT> - void eraseFromFilledBucket(BucketT *TheBucket, OnMovedT &&OnMoved) { - incrementEpoch(); - TheBucket->getSecond().~ValueT(); - decrementNumEntries(); - - BucketT *BucketsPtr = getBuckets(); - const unsigned NumBuckets = getNumBuckets(); - const unsigned Mask = NumBuckets - 1; - const KeyT EmptyKey = KeyInfoT::getEmptyKey(); - unsigned I = static_cast<unsigned>(TheBucket - BucketsPtr); - unsigned J = I; - while (true) { - J = (J + 1) & Mask; - BucketT &BJ = BucketsPtr[J]; - if (KeyInfoT::isEqual(BJ.getFirst(), EmptyKey)) - break; - auto Ideal = KeyInfoT::getHashValue(BJ.getFirst()); - // If the hole (I) lies on the linear-probe chain from the home bucket - // (Ideal) to J, shift J into the hole and make J the new hole. - if (((I - Ideal) & Mask) < ((J - Ideal) & Mask)) { - BucketT &BI = BucketsPtr[I]; - BI.getFirst() = std::move(BJ.getFirst()); - ::new (&BI.getSecond()) ValueT(std::move(BJ.getSecond())); - BJ.getSecond().~ValueT(); - OnMoved(BI); - I = J; - } - } - BucketsPtr[I].getFirst() = EmptyKey; - } - - /// Erase \p Val and close the resulting hole by potentially shifting other - /// entries into it. For callers that cache pointers into the bucket array, - /// call \p OnMoved per shifted bucket. - template <typename OnMovedT> bool erase(const KeyT &Val, OnMovedT &&OnMoved) { - BucketT *TheBucket = doFind(Val); - if (!TheBucket) - return false; - eraseFromFilledBucket(TheBucket, std::forward<OnMovedT>(OnMoved)); - return true; - } - DerivedT &derived() { return *static_cast<DerivedT *>(this); } const DerivedT &derived() const { return *static_cast<const DerivedT *>(this); @@ -595,6 +562,14 @@ class DenseMapBase : public DebugEpochBase { void decrementNumEntries() { setNumEntries(getNumEntries() - 1); } + unsigned getNumTombstones() const { return derived().getNumTombstones(); } + + void setNumTombstones(unsigned Num) { derived().setNumTombstones(Num); } + + void incrementNumTombstones() { setNumTombstones(getNumTombstones() + 1); } + + void decrementNumTombstones() { setNumTombstones(getNumTombstones() - 1); } + const BucketT *getBuckets() const { return derived().getBuckets(); } BucketT *getBuckets() { return derived().getBuckets(); } @@ -630,21 +605,37 @@ class DenseMapBase : public DebugEpochBase { BucketT *TheBucket) { incrementEpoch(); - // Grow the table if the load factor would exceed 3/4 after insertion. - // Linear probing with gap-closing deletion (Knuth Algorithm R) keeps - // every chain compact and bounded by the table's empty-bucket count, - // so no tombstone-driven resize is needed. + // If the load of the hash table is more than 3/4, or if fewer than 1/8 of + // the buckets are empty (meaning that many are filled with tombstones), + // grow the table. + // + // The later case is tricky. For example, if we had one empty bucket with + // tons of tombstones, failing lookups (e.g. for insertion) would have to + // probe almost the entire table until it found the empty bucket. If the + // table completely filled with tombstones, no lookup would ever succeed, + // causing infinite loops in lookup. unsigned NewNumEntries = getNumEntries() + 1; unsigned NumBuckets = getNumBuckets(); if (LLVM_UNLIKELY(NewNumEntries * 4 >= NumBuckets * 3)) { this->grow(NumBuckets * 2); LookupBucketFor(Lookup, TheBucket); + } else if (LLVM_UNLIKELY(NumBuckets - + (NewNumEntries + getNumTombstones()) <= + NumBuckets / 8)) { + this->grow(NumBuckets); + LookupBucketFor(Lookup, TheBucket); } assert(TheBucket); // Only update the state after we've grown our bucket space appropriately // so that when growing buckets we have self-consistent entry count. incrementNumEntries(); + + // If we are writing over a tombstone, remember this. + const KeyT EmptyKey = KeyInfoT::getEmptyKey(); + if (!KeyInfoT::isEqual(TheBucket->getFirst(), EmptyKey)) + decrementNumTombstones(); + return TheBucket; } @@ -657,6 +648,7 @@ class DenseMapBase : public DebugEpochBase { const KeyT EmptyKey = KeyInfoT::getEmptyKey(); unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1); + unsigned ProbeAmt = 1; while (true) { const BucketT *Bucket = BucketsPtr + BucketNo; if (LLVM_LIKELY(KeyInfoT::isEqual(Val, Bucket->getFirst()))) @@ -664,8 +656,10 @@ class DenseMapBase : public DebugEpochBase { if (LLVM_LIKELY(KeyInfoT::isEqual(Bucket->getFirst(), EmptyKey))) return nullptr; - // Hash collision: continue linear probing. - BucketNo = (BucketNo + 1) & (NumBuckets - 1); + // Otherwise, it's a hash collision or a tombstone, continue quadratic + // probing. + BucketNo += ProbeAmt++; + BucketNo &= NumBuckets - 1; } } @@ -676,7 +670,7 @@ class DenseMapBase : public DebugEpochBase { /// Lookup the appropriate bucket for Val, returning it in FoundBucket. If the /// bucket contains the key and a value, this returns true, otherwise it - /// returns a bucket with an empty marker and returns false. + /// returns a bucket with an empty marker or tombstone and returns false. template <typename LookupKeyT> bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) { BucketT *BucketsPtr = getBuckets(); @@ -687,11 +681,16 @@ class DenseMapBase : public DebugEpochBase { return false; } + // FoundTombstone - Keep track of whether we find a tombstone while probing. + BucketT *FoundTombstone = nullptr; const KeyT EmptyKey = KeyInfoT::getEmptyKey(); + const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); assert(!KeyInfoT::isEqual(Val, EmptyKey) && - "Empty value shouldn't be inserted into map!"); + !KeyInfoT::isEqual(Val, TombstoneKey) && + "Empty/Tombstone value shouldn't be inserted into map!"); unsigned BucketNo = KeyInfoT::getHashValue(Val) & (NumBuckets - 1); + unsigned ProbeAmt = 1; while (true) { BucketT *ThisBucket = BucketsPtr + BucketNo; // Found Val's bucket? If so, return it. @@ -701,14 +700,24 @@ class DenseMapBase : public DebugEpochBase { } // If we found an empty bucket, the key doesn't exist in the set. - // Return it as the insertion point. + // Insert it and return the default value. if (LLVM_LIKELY(KeyInfoT::isEqual(ThisBucket->getFirst(), EmptyKey))) { - FoundBucket = ThisBucket; + // If we've already seen a tombstone while probing, fill it in instead + // of the empty bucket we eventually probed to. + FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket; return false; } - // Hash collision: continue linear probing. - BucketNo = (BucketNo + 1) & (NumBuckets - 1); + // If this is a tombstone, remember it. If Val ends up not in the map, we + // prefer to return it than something that would require more probing. + if (KeyInfoT::isEqual(ThisBucket->getFirst(), TombstoneKey) && + !FoundTombstone) + FoundTombstone = ThisBucket; // Remember the first tombstone found. + + // Otherwise, it's a hash collision or a tombstone, continue quadratic + // probing. + BucketNo += ProbeAmt++; + BucketNo &= (NumBuckets - 1); } } @@ -769,6 +778,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>, BucketT *Buckets = nullptr; unsigned NumEntries = 0; + unsigned NumTombstones = 0; unsigned NumBuckets = 0; explicit DenseMap(unsigned NumBuckets, typename BaseT::ExactBucketCount) { @@ -821,6 +831,7 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>, void swapImpl(DenseMap &RHS) { std::swap(Buckets, RHS.Buckets); std::swap(NumEntries, RHS.NumEntries); + std::swap(NumTombstones, RHS.NumTombstones); std::swap(NumBuckets, RHS.NumBuckets); } @@ -828,6 +839,10 @@ class DenseMap : public DenseMapBase<DenseMap<KeyT, ValueT, KeyInfoT, BucketT>, void setNumEntries(unsigned Num) { NumEntries = Num; } + unsigned getNumTombstones() const { return NumTombstones; } + + void setNumTombstones(unsigned Num) { NumTombstones = Num; } + BucketT *getBuckets() const { return Buckets; } unsigned getNumBuckets() const { return NumBuckets; } @@ -896,6 +911,7 @@ class SmallDenseMap unsigned Small : 1; unsigned NumEntries : 31; + unsigned NumTombstones; struct LargeRep { BucketT *Buckets; @@ -962,8 +978,10 @@ class SmallDenseMap unsigned TmpNumEntries = RHS.NumEntries; RHS.NumEntries = NumEntries; NumEntries = TmpNumEntries; + std::swap(NumTombstones, RHS.NumTombstones); const KeyT EmptyKey = KeyInfoT::getEmptyKey(); + const KeyT TombstoneKey = KeyInfoT::getTombstoneKey(); if (Small && RHS.Small) { // If we're swapping inline bucket arrays, we have to cope with some of // the tricky bits of DenseMap's storage system: the buckets are not @@ -972,8 +990,10 @@ class SmallDenseMap for (unsigned i = 0, e = InlineBuckets; i != e; ++i) { BucketT *LHSB = &getInlineBuckets()[i], *RHSB = &RHS.getInlineBuckets()[i]; - bool hasLHSValue = !KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey); - bool hasRHSValue = !KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey); + bool hasLHSValue = (!KeyInfoT::isEqual(LHSB->getFirst(), EmptyKey) && + !KeyInfoT::isEqual(LHSB->getFirst(), TombstoneKey)); + bool hasRHSValue = (!KeyInfoT::isEqual(RHSB->getFirst(), EmptyKey) && + !KeyInfoT::isEqual(RHSB->getFirst(), TombstoneKey)); if (hasLHSValue && hasRHSValue) { // Swap together if we can... std::swap(*LHSB, *RHSB); @@ -1012,7 +1032,8 @@ class SmallDenseMap *OldB = &SmallSide.getInlineBuckets()[i]; ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst())); OldB->getFirst().~KeyT(); - if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey)) { + if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) && + !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) { ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond())); OldB->getSecond().~ValueT(); } @@ -1032,6 +1053,10 @@ class SmallDenseMap NumEntries = Num; } + unsigned getNumTombstones() const { return NumTombstones; } + + void setNumTombstones(unsigned Num) { NumTombstones = Num; } + const BucketT *getInlineBuckets() const { assert(Small); // Note that this cast does not violate aliasing rules as we assert that @@ -1119,6 +1144,7 @@ class SmallDenseMap Small = false; NumEntries = Other.NumEntries; + NumTombstones = Other.NumTombstones; *getLargeRep() = std::move(*Other.getLargeRep()); Other.getLargeRep()->NumBuckets = 0; return true; @@ -1247,8 +1273,10 @@ class DenseMapIterator : DebugEpochBase::HandleBase { void AdvancePastEmptyBuckets() { assert(Ptr <= End); const KeyT Empty = KeyInfoT::getEmptyKey(); + const KeyT Tombstone = KeyInfoT::getTombstoneKey(); - while (Ptr != End && KeyInfoT::isEqual(Ptr->getFirst(), Empty)) + while (Ptr != End && (KeyInfoT::isEqual(Ptr->getFirst(), Empty) || + KeyInfoT::isEqual(Ptr->getFirst(), Tombstone))) ++Ptr; } diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index f5a501897f0bb..074d42d728c95 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -1222,10 +1222,7 @@ void ValueHandleBase::RemoveFromUseList() { LLVMContextImpl *pImpl = getValPtr()->getContext().pImpl; DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles; if (Handles.isPointerIntoBucketsArray(PrevPtr)) { - // TODO: Remove the only user of DenseMap's callback erase. - Handles.erase(getValPtr(), [](auto &Bucket) { - Bucket.second->setPrevPtr(&Bucket.second); - }); + Handles.erase(getValPtr()); getValPtr()->HasValueHandle = false; } } diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp index adf788b8f9661..d3200b7722ee3 100644 --- a/llvm/unittests/ADT/BitVectorTest.cpp +++ b/llvm/unittests/ADT/BitVectorTest.cpp @@ -1434,7 +1434,8 @@ TYPED_TEST(BitVectorTest, DenseSet) { #if LLVM_ENABLE_ABI_BREAKING_CHECKS TypeParam D; - EXPECT_DEATH(Set.insert(D), "Empty value shouldn't be inserted into map!"); + EXPECT_DEATH(Set.insert(D), + "Empty/Tombstone value shouldn't be inserted into map!"); #endif EXPECT_EQ(3U, Set.size()); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
