https://github.com/kuhar updated https://github.com/llvm/llvm-project/pull/188242
>From fe6015661a62244e4f26e2e7fdd41c9763479bd0 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski <[email protected]> Date: Tue, 24 Mar 2026 09:15:12 -0400 Subject: [PATCH 1/3] Reland "[llvm][ADT] Refactor PointerUnion to use PunnedPointer. NFC." This reverts commit 040b7e0a1deb and re-lands the PointerUnion refactoring from #187950, with a fix for the 32-bit crash. The bug was in doCast: it masked with PointerLikeTypeTraits<To>::NumLowBitsAvailable to strip tag bits, but the old PointerIntPair-based code masked with minLowBitsAvailable() (the minimum across all union members). When a member type's PLTT over-claims spare low bits, the new mask was too aggressive and cleared bits belonging to a nested PointerUnion's tag. Concretely, on 32-bit systems, Redeclarable::DeclLink nests LazyGenerationalUpdatePtr (LGUP) whose PLTT claims 2 spare bits (Decl* has alignas(8) = 3 bits, minus 1). But LGUP's inner PointerUnion<Decl*, LazyData*> only has 1 spare bit on 32-bit (alignof(LazyData) = 4 gives LazyData* only 2 low bits, tagShift = 1). Extracting LGUP from the outer PointerUnion cleared bit 1 (the inner PU's type tag), corrupting the discriminator and breaking redeclaration chains. Added new unit tests that cover this scenario. Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]> --- llvm/include/llvm/ADT/PointerUnion.h | 259 +++++++++++++----------- llvm/unittests/ADT/PointerUnionTest.cpp | 90 ++++++++ 2 files changed, 230 insertions(+), 119 deletions(-) diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h index d9087dd1c516e..5501272372492 100644 --- a/llvm/include/llvm/ADT/PointerUnion.h +++ b/llvm/include/llvm/ADT/PointerUnion.h @@ -28,63 +28,56 @@ namespace llvm { namespace pointer_union_detail { - /// Determine the number of bits required to store integers with values < n. - /// This is ceil(log2(n)). - constexpr int bitsRequired(unsigned n) { - return n == 0 ? 0 : llvm::bit_width_constexpr(n - 1); - } - template <typename... Ts> constexpr int lowBitsAvailable() { - return std::min<int>({PointerLikeTypeTraits<Ts>::NumLowBitsAvailable...}); - } +/// Determine the number of bits required to store values in [0, NumValues). +/// This is ceil(log2(NumValues)). +constexpr int bitsRequired(unsigned NumValues) { + return NumValues == 0 ? 0 : llvm::bit_width_constexpr(NumValues - 1); +} - /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion - /// for the template arguments. - template <typename ...PTs> class PointerUnionUIntTraits { - public: - static inline void *getAsVoidPointer(void *P) { return P; } - static inline void *getFromVoidPointer(void *P) { return P; } - static constexpr int NumLowBitsAvailable = lowBitsAvailable<PTs...>(); - }; - - template <typename Derived, typename ValTy, int I, typename ...Types> - class PointerUnionMembers; - - template <typename Derived, typename ValTy, int I> - class PointerUnionMembers<Derived, ValTy, I> { - protected: - ValTy Val; - PointerUnionMembers() = default; - PointerUnionMembers(ValTy Val) : Val(Val) {} - - friend struct PointerLikeTypeTraits<Derived>; - }; - - template <typename Derived, typename ValTy, int I, typename Type, - typename ...Types> - class PointerUnionMembers<Derived, ValTy, I, Type, Types...> - : public PointerUnionMembers<Derived, ValTy, I + 1, Types...> { - using Base = PointerUnionMembers<Derived, ValTy, I + 1, Types...>; - public: - using Base::Base; - PointerUnionMembers() = default; - PointerUnionMembers(Type V) - : Base(ValTy(const_cast<void *>( - PointerLikeTypeTraits<Type>::getAsVoidPointer(V)), - I)) {} - - using Base::operator=; - Derived &operator=(Type V) { - this->Val = ValTy( - const_cast<void *>(PointerLikeTypeTraits<Type>::getAsVoidPointer(V)), - I); - return static_cast<Derived &>(*this); - }; - }; +template <typename... Ts> constexpr int lowBitsAvailable() { + return std::min( + {static_cast<int>(PointerLikeTypeTraits<Ts>::NumLowBitsAvailable)...}); } +/// CRTP base that generates non-template constructors and assignment operators +/// for each type in the union. Non-template constructors allow implicit +/// conversions (derived-to-base, non-const-to-const). +template <typename Derived, int Idx, typename... Types> +class PointerUnionMembers; + +template <typename Derived, int Idx> class PointerUnionMembers<Derived, Idx> { +protected: + detail::PunnedPointer<void *> Val; + PointerUnionMembers() : Val(uintptr_t(0)) {} + + template <typename To, typename From, typename Enable> + friend struct ::llvm::CastInfo; + template <typename> friend struct ::llvm::PointerLikeTypeTraits; +}; + +template <typename Derived, int Idx, typename Type, typename... Types> +class PointerUnionMembers<Derived, Idx, Type, Types...> + : public PointerUnionMembers<Derived, Idx + 1, Types...> { + using Base = PointerUnionMembers<Derived, Idx + 1, Types...>; + +public: + using Base::Base; + PointerUnionMembers() = default; + + PointerUnionMembers(Type V) { this->Val = Derived::encode(V); } + + using Base::operator=; + Derived &operator=(Type V) { + this->Val = Derived::encode(V); + return static_cast<Derived &>(*this); + } +}; + +} // end namespace pointer_union_detail + /// A discriminated union of two or more pointer types, with the discriminator -/// in the low bit of the pointer. +/// in the low bits of the pointer. /// /// This implementation is extremely efficient in space due to leveraging the /// low bits of the pointer, while exposing a natural and type-safe API. @@ -102,38 +95,66 @@ namespace pointer_union_detail { /// PointerUnion<int*, int*> Q; // compile time failure. template <typename... PTs> class PointerUnion - : public pointer_union_detail::PointerUnionMembers< - PointerUnion<PTs...>, - PointerIntPair< - void *, pointer_union_detail::bitsRequired(sizeof...(PTs)), int, - pointer_union_detail::PointerUnionUIntTraits<PTs...>>, - 0, PTs...> { + : public pointer_union_detail::PointerUnionMembers<PointerUnion<PTs...>, 0, + PTs...> { static_assert(TypesAreDistinct<PTs...>::value, "PointerUnion alternative types cannot be repeated"); - // The first type is special because we want to directly cast a pointer to a - // default-initialized union to a pointer to the first type. But we don't - // want PointerUnion to be a 'template <typename First, typename ...Rest>' - // because it's much more convenient to have a name for the whole pack. So - // split off the first type here. - using First = TypeAtIndex<0, PTs...>; + using Base = typename PointerUnion::PointerUnionMembers; + using First = TypeAtIndex<0, PTs...>; - // Give the CastInfo specialization below access to protected members. - // - // This makes all of CastInfo a friend, which is more than strictly - // necessary. It's a workaround for C++'s inability to friend a - // partial template specialization. + template <typename, int, typename...> + friend class pointer_union_detail::PointerUnionMembers; template <typename To, typename From, typename Enable> friend struct CastInfo; + template <typename> friend struct PointerLikeTypeTraits; + + // These are constexpr functions rather than static constexpr data members + // so that alignof() on potentially incomplete types is not evaluated at + // class-definition time. + static constexpr int minLowBitsAvailable() { + return pointer_union_detail::lowBitsAvailable<PTs...>(); + } + + static constexpr int tagBits() { + return pointer_union_detail::bitsRequired(sizeof...(PTs)); + } + + /// The tag is shifted to the high end of the available low bits so that + /// the lowest bits remain free for nesting in PointerIntPair or SmallPtrSet. + static constexpr int tagShift() { return minLowBitsAvailable() - tagBits(); } + + static constexpr uintptr_t tagMask() { + return (uintptr_t(1) << tagBits()) - 1; + } + + template <typename T> static uintptr_t encode(T V) { + constexpr int Shift = tagShift(); + constexpr auto Tag = uintptr_t(FirstIndexOfType<T, PTs...>::value); + uintptr_t PtrInt = reinterpret_cast<uintptr_t>( + PointerLikeTypeTraits<T>::getAsVoidPointer(V)); + assert((PtrInt & (tagMask() << Shift)) == 0 && + "Pointer low bits collide with tag"); + return PtrInt | (Tag << Shift); + } public: PointerUnion() = default; - PointerUnion(std::nullptr_t) : PointerUnion() {} using Base::Base; + using Base::operator=; + + /// Assignment from nullptr clears the union, resetting to the first type. + const PointerUnion &operator=(std::nullptr_t) { + this->Val = uintptr_t(0); + return *this; + } /// Test if the pointer held in the union is null, regardless of /// which type it is. - bool isNull() const { return !this->Val.getPointer(); } + bool isNull() const { + return (static_cast<uintptr_t>(this->Val.asInt()) >> + minLowBitsAvailable()) == 0; + } explicit operator bool() const { return !isNull(); } @@ -141,18 +162,14 @@ class PointerUnion // isa<T>, cast<T> and the llvm::dyn_cast<T> /// Test if the Union currently holds the type matching T. - template <typename T> - [[deprecated("Use isa instead")]] - inline bool is() const { + template <typename T> [[deprecated("Use isa instead")]] bool is() const { return isa<T>(*this); } /// Returns the value of the specified pointer type. /// /// If the specified pointer type is incorrect, assert. - template <typename T> - [[deprecated("Use cast instead")]] - inline T get() const { + template <typename T> [[deprecated("Use cast instead")]] T get() const { assert(isa<T>(*this) && "Invalid accessor called"); return cast<T>(*this); } @@ -172,61 +189,62 @@ class PointerUnion /// If the union is set to the first pointer type get an address pointing to /// it. First *getAddrOfPtr1() { + static_assert(FirstIndexOfType<First, PTs...>::value == 0, + "First type must have tag value 0 for getAddrOfPtr1"); assert(isa<First>(*this) && "Val is not the first pointer"); + // tag == 0 for first type, so asInt() is the raw pointer value. assert( PointerLikeTypeTraits<First>::getAsVoidPointer(cast<First>(*this)) == - this->Val.getPointer() && + reinterpret_cast<void *>(this->Val.asInt()) && "Can't get the address because PointerLikeTypeTraits changes the ptr"); return const_cast<First *>( - reinterpret_cast<const First *>(this->Val.getAddrOfPointer())); + reinterpret_cast<const First *>(this->Val.getPointerAddress())); } - /// Assignment from nullptr which just clears the union. - const PointerUnion &operator=(std::nullptr_t) { - this->Val.initWithPointer(nullptr); - return *this; + void *getOpaqueValue() const { + return reinterpret_cast<void *>(this->Val.asInt()); } - /// Assignment from elements of the union. - using Base::operator=; - - void *getOpaqueValue() const { return this->Val.getOpaqueValue(); } static inline PointerUnion getFromOpaqueValue(void *VP) { PointerUnion V; - V.Val = decltype(V.Val)::getFromOpaqueValue(VP); + V.Val = reinterpret_cast<intptr_t>(VP); return V; } -}; -template <typename ...PTs> -bool operator==(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) { - return lhs.getOpaqueValue() == rhs.getOpaqueValue(); -} + friend bool operator==(PointerUnion lhs, PointerUnion rhs) { + return lhs.getOpaqueValue() == rhs.getOpaqueValue(); + } -template <typename ...PTs> -bool operator!=(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) { - return lhs.getOpaqueValue() != rhs.getOpaqueValue(); -} + friend bool operator!=(PointerUnion lhs, PointerUnion rhs) { + return lhs.getOpaqueValue() != rhs.getOpaqueValue(); + } -template <typename ...PTs> -bool operator<(PointerUnion<PTs...> lhs, PointerUnion<PTs...> rhs) { - return lhs.getOpaqueValue() < rhs.getOpaqueValue(); -} + friend bool operator<(PointerUnion lhs, PointerUnion rhs) { + return lhs.getOpaqueValue() < rhs.getOpaqueValue(); + } +}; -// Specialization of CastInfo for PointerUnion +// Specialization of CastInfo for PointerUnion. template <typename To, typename... PTs> struct CastInfo<To, PointerUnion<PTs...>> : public DefaultDoCastIfPossible<To, PointerUnion<PTs...>, CastInfo<To, PointerUnion<PTs...>>> { using From = PointerUnion<PTs...>; - static inline bool isPossible(From &f) { - return f.Val.getInt() == FirstIndexOfType<To, PTs...>::value; + static inline bool isPossible(From &F) { + constexpr int Shift = From::tagShift(); + constexpr auto Tag = uintptr_t(FirstIndexOfType<To, PTs...>::value); + auto V = reinterpret_cast<uintptr_t>(F.getOpaqueValue()); + return ((V >> Shift) & From::tagMask()) == Tag; } - static To doCast(From &f) { - assert(isPossible(f) && "cast to an incompatible type!"); - return PointerLikeTypeTraits<To>::getFromVoidPointer(f.Val.getPointer()); + static To doCast(From &F) { + assert(isPossible(F) && "cast to an incompatible type!"); + constexpr uintptr_t PtrMask = + ~((uintptr_t(1) << From::minLowBitsAvailable()) - 1); + void *Ptr = reinterpret_cast<void *>( + reinterpret_cast<uintptr_t>(F.getOpaqueValue()) & PtrMask); + return PointerLikeTypeTraits<To>::getFromVoidPointer(Ptr); } static inline To castFailed() { return To(); } @@ -238,26 +256,29 @@ struct CastInfo<To, const PointerUnion<PTs...>> CastInfo<To, PointerUnion<PTs...>>> { }; -// Teach SmallPtrSet that PointerUnion is "basically a pointer", that has -// # low bits available = min(PT1bits,PT2bits)-1. -template <typename ...PTs> -struct PointerLikeTypeTraits<PointerUnion<PTs...>> { - static inline void *getAsVoidPointer(const PointerUnion<PTs...> &P) { +// Teach SmallPtrSet that PointerUnion is "basically a pointer". +// Spare low bits below the tag are available for nesting. +// This specialization is only instantiated when used (lazy), so +// PointerLikeTypeTraits<PTs> / alignof() are not evaluated for +// incomplete types. +template <typename... PTs> struct PointerLikeTypeTraits<PointerUnion<PTs...>> { + using Union = PointerUnion<PTs...>; + + static inline void *getAsVoidPointer(const Union &P) { return P.getOpaqueValue(); } - static inline PointerUnion<PTs...> getFromVoidPointer(void *P) { - return PointerUnion<PTs...>::getFromOpaqueValue(P); + static inline Union getFromVoidPointer(void *P) { + return Union::getFromOpaqueValue(P); } // The number of bits available are the min of the pointer types minus the // bits needed for the discriminator. - static constexpr int NumLowBitsAvailable = PointerLikeTypeTraits<decltype( - PointerUnion<PTs...>::Val)>::NumLowBitsAvailable; + static constexpr int NumLowBitsAvailable = Union::tagShift(); }; // Teach DenseMap how to use PointerUnions as keys. -template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> { +template <typename... PTs> struct DenseMapInfo<PointerUnion<PTs...>> { using Union = PointerUnion<PTs...>; using FirstInfo = DenseMapInfo<TypeAtIndex<0, PTs...>>; @@ -268,8 +289,8 @@ template <typename ...PTs> struct DenseMapInfo<PointerUnion<PTs...>> { } static unsigned getHashValue(const Union &UnionVal) { - intptr_t key = (intptr_t)UnionVal.getOpaqueValue(); - return DenseMapInfo<intptr_t>::getHashValue(key); + auto Key = reinterpret_cast<uintptr_t>(UnionVal.getOpaqueValue()); + return DenseMapInfo<uintptr_t>::getHashValue(Key); } static bool isEqual(const Union &LHS, const Union &RHS) { diff --git a/llvm/unittests/ADT/PointerUnionTest.cpp b/llvm/unittests/ADT/PointerUnionTest.cpp index d8ac3aed76da2..fa4c86e51bccc 100644 --- a/llvm/unittests/ADT/PointerUnionTest.cpp +++ b/llvm/unittests/ADT/PointerUnionTest.cpp @@ -289,4 +289,94 @@ TEST_F(PointerUnionTest, NewCastInfra) { "type mismatch for cast with PointerUnion"); } +// Regression test: doCast must mask with minLowBitsAvailable(), not +// To::NumLowBitsAvailable, to avoid clearing inner PointerUnion tag bits. +// This reproduces the 32-bit crash from PR #187950 on any platform by +// using types whose alignment mimics the 32-bit DeclLink layout: +// OuterPU<InnerPU, OverClaimWrapper> +// where OverClaimWrapper's PLTT claims more low bits than its inner +// PointerUnion actually has spare. + +struct alignas(8) HighAlign { int x; }; +struct alignas(4) LowAlign { int x; }; + +// Wrapper around a PointerUnion that over-claims NumLowBitsAvailable, +// mimicking LazyGenerationalUpdatePtr's PLTT on 32-bit. +struct OverClaimWrapper { + PointerUnion<HighAlign *, LowAlign *> Value; + + OverClaimWrapper() = default; + explicit OverClaimWrapper(decltype(Value) V) : Value(V) {} + + void *getOpaqueValue() { return Value.getOpaqueValue(); } + static OverClaimWrapper getFromOpaqueValue(void *P) { + return OverClaimWrapper(decltype(Value)::getFromOpaqueValue(P)); + } +}; + +} // end anonymous namespace + +namespace llvm { +template <> struct PointerLikeTypeTraits<OverClaimWrapper> { + static void *getAsVoidPointer(OverClaimWrapper W) { + return W.getOpaqueValue(); + } + static OverClaimWrapper getFromVoidPointer(void *P) { + return OverClaimWrapper::getFromOpaqueValue(P); + } + // Inner PU<HighAlign*(3 bits), LowAlign*(2 bits)> has tagShift=1, so only + // 1 spare bit. Claiming 2 bits mimics the LGUP over-claim on 32-bit. + static constexpr int NumLowBitsAvailable = 2; +}; +} // namespace llvm + +namespace { + +TEST(PointerUnionNestedTest, NestedTagPreservation) { + // Inner PU: PointerUnion<HighAlign*, LowAlign*> + // minLowBits = min(3, 2) = 2, tagBits = 1, tagShift = 1 + // Tag for LowAlign* (index 1) is in bit 1. + // NumLowBitsAvailable = 1 + + // Outer PU: PointerUnion<InnerPU, OverClaimWrapper> + // InnerPU NumLowBitsAvailable = 1 + // OverClaimWrapper NumLowBitsAvailable = 2 (over-claimed) + // minLowBits = 1, tagBits = 1, tagShift = 0 + + using InnerPU = PointerUnion<HighAlign *, LowAlign *>; + using OuterPU = PointerUnion<InnerPU, OverClaimWrapper>; + + LowAlign low; + + // Store LowAlign* in the inner PU (tag = 1, in bit 1). + InnerPU inner(&low); + ASSERT_TRUE(isa<LowAlign *>(inner)); + ASSERT_EQ(cast<LowAlign *>(inner), &low); + + // Wrap it and store in the outer PU. + OverClaimWrapper wrapper(inner); + OuterPU outer(wrapper); + ASSERT_TRUE(isa<OverClaimWrapper>(outer)); + + // Extract the wrapper back. Before the fix, doCast would clear bit 1 + // (the inner PU's tag), corrupting the type discriminator. + OverClaimWrapper extracted = cast<OverClaimWrapper>(outer); + InnerPU extractedInner = extracted.Value; + + EXPECT_TRUE(isa<LowAlign *>(extractedInner)) + << "Inner PointerUnion tag corrupted during doCast: expected LowAlign*, " + "got HighAlign*. doCast must not clear bits beyond " + "minLowBitsAvailable()."; + EXPECT_EQ(cast<LowAlign *>(extractedInner), &low); + + // Also verify the HighAlign* path (tag = 0) works. + HighAlign high; + InnerPU inner2(&high); + OverClaimWrapper wrapper2(inner2); + OuterPU outer2(wrapper2); + OverClaimWrapper extracted2 = cast<OverClaimWrapper>(outer2); + EXPECT_TRUE(isa<HighAlign *>(extracted2.Value)); + EXPECT_EQ(cast<HighAlign *>(extracted2.Value), &high); +} + } // end anonymous namespace >From c9f1e122937df3da6d8cd33494770d8a2614aff3 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski <[email protected]> Date: Tue, 24 Mar 2026 09:27:29 -0400 Subject: [PATCH 2/3] Format --- llvm/unittests/ADT/PointerUnionTest.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/ADT/PointerUnionTest.cpp b/llvm/unittests/ADT/PointerUnionTest.cpp index fa4c86e51bccc..74817468612c1 100644 --- a/llvm/unittests/ADT/PointerUnionTest.cpp +++ b/llvm/unittests/ADT/PointerUnionTest.cpp @@ -297,8 +297,12 @@ TEST_F(PointerUnionTest, NewCastInfra) { // where OverClaimWrapper's PLTT claims more low bits than its inner // PointerUnion actually has spare. -struct alignas(8) HighAlign { int x; }; -struct alignas(4) LowAlign { int x; }; +struct alignas(8) HighAlign { + int x; +}; +struct alignas(4) LowAlign { + int x; +}; // Wrapper around a PointerUnion that over-claims NumLowBitsAvailable, // mimicking LazyGenerationalUpdatePtr's PLTT on 32-bit. >From c8c3bbcb91f3add129a5e2e51b9b039a1bc72ccf Mon Sep 17 00:00:00 2001 From: Jakub Kuderski <[email protected]> Date: Tue, 24 Mar 2026 11:39:26 -0400 Subject: [PATCH 3/3] Add TODO for the overclaimed bits --- clang/include/clang/AST/ExternalASTSource.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/include/clang/AST/ExternalASTSource.h b/clang/include/clang/AST/ExternalASTSource.h index 8fc490b1d8471..9a2c1433c6362 100644 --- a/clang/include/clang/AST/ExternalASTSource.h +++ b/clang/include/clang/AST/ExternalASTSource.h @@ -529,6 +529,8 @@ struct PointerLikeTypeTraits< static void *getAsVoidPointer(Ptr P) { return P.getOpaqueValue(); } static Ptr getFromVoidPointer(void *P) { return Ptr::getFromOpaqueValue(P); } + // TODO(#188269): Overclaims on 32-bit. The correct value is: + // PointerLikeTypeTraits<typename Ptr::ValueType>::NumLowBitsAvailable static constexpr int NumLowBitsAvailable = PointerLikeTypeTraits<T>::NumLowBitsAvailable - 1; }; _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
