Author: Duncan P. N. Exon Smith Date: 2021-01-15T14:27:48-08:00 New Revision: ceaf0110ff5e0c2de1f03d65d13703d34d0d5737
URL: https://github.com/llvm/llvm-project/commit/ceaf0110ff5e0c2de1f03d65d13703d34d0d5737 DIFF: https://github.com/llvm/llvm-project/commit/ceaf0110ff5e0c2de1f03d65d13703d34d0d5737.diff LOG: Revert "Revert "ADT: Fix reference invalidation in SmallVector..."" This reverts commit 33be50daa9ce1074c3b423a4ab27c70c0722113a, effectively reapplying: - 260a856c2abcef49c7cb3bdcd999701db3e2af38 - 3043e5a5c33c4c871f4a1dfd621a8839f9a1f0b3 - 49142991a685bd427d7e877c29c77371dfb7634c ... with a fix to skip a call to `SmallVector::isReferenceToStorage()` when we know the parameter had been taken by value for small, POD-like `T`. See https://reviews.llvm.org/D93779 for the discussion on the revert. At a high-level, these commits fix reference invalidation in SmallVector's push_back, append, insert (one or N), and resize operations. For more details, please see the original commit messages. This commit fixes a bug that crept into `SmallVectorTemplateCommon::reserveForAndGetAddress()` during the review process after performance analysis was done. That function is now called `reserveForParamAndGetAddress()`, clarifying that it only works for parameter values. It uses that knowledge to bypass `SmallVector::isReferenceToStorage()` when `TakesParamByValue`. This is `constexpr` and avoids adding overhead for "small enough", trivially copyable `T`. Performance could potentially be tuned further by increasing the threshold for `TakesParamByValue`, which is currently defined as: ``` bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *); ``` in the POD-like version of SmallVectorTemplateBase (else, `false`). Differential Revision: https://reviews.llvm.org/D94800 Added: Modified: llvm/include/llvm/ADT/SmallVector.h llvm/unittests/ADT/SmallVectorTest.cpp Removed: ################################################################################ diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h index 78d0848b1fcc..2e47846ee7cf 100644 --- a/llvm/include/llvm/ADT/SmallVector.h +++ b/llvm/include/llvm/ADT/SmallVector.h @@ -220,6 +220,27 @@ class SmallVectorTemplateCommon } void assertSafeToEmplace() {} + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + template <class U> + static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt, + size_t N) { + size_t NewSize = This->size() + N; + if (LLVM_LIKELY(NewSize <= This->capacity())) + return &Elt; + + bool ReferencesStorage = false; + int64_t Index = -1; + if (!U::TakesParamByValue) { + if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))) { + ReferencesStorage = true; + Index = &Elt - This->begin(); + } + } + This->grow(NewSize); + return ReferencesStorage ? This->begin() + Index : &Elt; + } + public: using size_type = size_t; using diff erence_type = ptr diff _t; @@ -303,7 +324,12 @@ template <typename T, bool = (is_trivially_copy_constructible<T>::value) && (is_trivially_move_constructible<T>::value) && std::is_trivially_destructible<T>::value> class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { + friend class SmallVectorTemplateCommon<T>; + protected: + static constexpr bool TakesParamByValue = false; + using ValueParamT = const T &; + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} static void destroy_range(T *S, T *E) { @@ -333,20 +359,32 @@ class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { /// element, or MinSize more elements if specified. void grow(size_t MinSize = 0); + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { + return this->reserveForParamAndGetAddressImpl(this, Elt, N); + } + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { + return const_cast<T *>( + this->reserveForParamAndGetAddressImpl(this, Elt, N)); + } + + static T &&forward_value_param(T &&V) { return std::move(V); } + static const T &forward_value_param(const T &V) { return V; } + public: void push_back(const T &Elt) { - this->assertSafeToAdd(&Elt); - if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); - ::new ((void*) this->end()) T(Elt); + const T *EltPtr = reserveForParamAndGetAddress(Elt); + ::new ((void *)this->end()) T(*EltPtr); this->set_size(this->size() + 1); } void push_back(T &&Elt) { - this->assertSafeToAdd(&Elt); - if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); - ::new ((void*) this->end()) T(::std::move(Elt)); + T *EltPtr = reserveForParamAndGetAddress(Elt); + ::new ((void *)this->end()) T(::std::move(*EltPtr)); this->set_size(this->size() + 1); } @@ -396,7 +434,18 @@ void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { /// skipping destruction. template <typename T> class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { + friend class SmallVectorTemplateCommon<T>; + protected: + /// True if it's cheap enough to take parameters by value. Doing so avoids + /// overhead related to mitigations for reference invalidation. + static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *); + + /// Either const T& or T, depending on whether it's cheap enough to take + /// parameters by value. + using ValueParamT = + typename std::conditional<TakesParamByValue, T, const T &>::type; + SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} // No need to do a destroy loop for POD's. @@ -437,12 +486,26 @@ class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { /// least one more element or MinSize if specified. void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); } + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { + return this->reserveForParamAndGetAddressImpl(this, Elt, N); + } + + /// Reserve enough space to add one element, and return the updated element + /// pointer in case it was a reference to the storage. + T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { + return const_cast<T *>( + this->reserveForParamAndGetAddressImpl(this, Elt, N)); + } + + /// Copy \p V or return a reference, depending on \a ValueParamT. + static ValueParamT forward_value_param(ValueParamT V) { return V; } + public: - void push_back(const T &Elt) { - this->assertSafeToAdd(&Elt); - if (LLVM_UNLIKELY(this->size() >= this->capacity())) - this->grow(); - memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T)); + void push_back(ValueParamT Elt) { + const T *EltPtr = reserveForParamAndGetAddress(Elt); + memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T)); this->set_size(this->size() + 1); } @@ -462,6 +525,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { using size_type = typename SuperClass::size_type; protected: + using SmallVectorTemplateBase<T>::TakesParamByValue; + using ValueParamT = typename SuperClass::ValueParamT; + // Default ctor - Initialize to empty. explicit SmallVectorImpl(unsigned N) : SmallVectorTemplateBase<T>(N) {} @@ -502,7 +568,7 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { /// Like resize, but \ref T is POD, the new values won't be initialized. void resize_for_overwrite(size_type N) { resizeImpl<true>(N); } - void resize(size_type N, const T &NV) { + void resize(size_type N, ValueParamT NV) { if (N == this->size()) return; @@ -511,11 +577,8 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { return; } - this->assertSafeToReferenceAfterResize(&NV, N); - if (this->capacity() < N) - this->grow(N); - std::uninitialized_fill(this->end(), this->begin() + N, NV); - this->set_size(N); + // N > this->size(). Defer to append. + this->append(N - this->size(), NV); } void reserve(size_type N) { @@ -551,12 +614,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { } /// Append \p NumInputs copies of \p Elt to the end. - void append(size_type NumInputs, const T &Elt) { - this->assertSafeToAdd(&Elt, NumInputs); - if (NumInputs > this->capacity() - this->size()) - this->grow(this->size()+NumInputs); - - std::uninitialized_fill_n(this->end(), NumInputs, Elt); + void append(size_type NumInputs, ValueParamT Elt) { + const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs); + std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr); this->set_size(this->size() + NumInputs); } @@ -622,6 +682,12 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { private: template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) { + // Callers ensure that ArgType is derived from T. + static_assert( + std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>, + T>::value, + "ArgType must be derived from T!"); + if (I == this->end()) { // Important special case for empty vector. this->push_back(::std::forward<ArgType>(Elt)); return this->end()-1; @@ -629,14 +695,11 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); - // Check that adding an element won't invalidate Elt. - this->assertSafeToAdd(&Elt); - - if (this->size() >= this->capacity()) { - size_t EltNo = I-this->begin(); - this->grow(); - I = this->begin()+EltNo; - } + // Grow if necessary. + size_t Index = I - this->begin(); + std::remove_reference_t<ArgType> *EltPtr = + this->reserveForParamAndGetAddress(Elt); + I = this->begin() + Index; ::new ((void*) this->end()) T(::std::move(this->back())); // Push everything else over. @@ -644,9 +707,10 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { this->set_size(this->size() + 1); // If we just moved the element we're inserting, be sure to update - // the reference. - std::remove_reference_t<ArgType> *EltPtr = &Elt; - if (this->isReferenceToRange(EltPtr, I, this->end())) + // the reference (never happens if TakesParamByValue). + static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value, + "ArgType must be 'T' when taking by value!"); + if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end())) ++EltPtr; *I = ::std::forward<ArgType>(*EltPtr); @@ -655,12 +719,14 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { public: iterator insert(iterator I, T &&Elt) { - return insert_one_impl(I, std::move(Elt)); + return insert_one_impl(I, this->forward_value_param(std::move(Elt))); } - iterator insert(iterator I, const T &Elt) { return insert_one_impl(I, Elt); } + iterator insert(iterator I, const T &Elt) { + return insert_one_impl(I, this->forward_value_param(Elt)); + } - iterator insert(iterator I, size_type NumToInsert, const T &Elt) { + iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) { // Convert iterator to elt# to avoid invalidating iterator when we reserve() size_t InsertElt = I - this->begin(); @@ -671,11 +737,9 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); - // Check that adding NumToInsert elements won't invalidate Elt. - this->assertSafeToAdd(&Elt, NumToInsert); - - // Ensure there is enough space. - reserve(this->size() + NumToInsert); + // Ensure there is enough space, and get the (maybe updated) address of + // Elt. + const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert); // Uninvalidate the iterator. I = this->begin()+InsertElt; @@ -692,7 +756,12 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { // Copy the existing elements that get replaced. std::move_backward(I, OldEnd-NumToInsert, OldEnd); - std::fill_n(I, NumToInsert, Elt); + // If we just moved the element we're inserting, be sure to update + // the reference (never happens if TakesParamByValue). + if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) + EltPtr += NumToInsert; + + std::fill_n(I, NumToInsert, *EltPtr); return I; } @@ -705,11 +774,16 @@ class SmallVectorImpl : public SmallVectorTemplateBase<T> { size_t NumOverwritten = OldEnd-I; this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); + // If we just moved the element we're inserting, be sure to update + // the reference (never happens if TakesParamByValue). + if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) + EltPtr += NumToInsert; + // Replace the overwritten part. - std::fill_n(I, NumOverwritten, Elt); + std::fill_n(I, NumOverwritten, *EltPtr); // Insert the non-overwritten middle part. - std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt); + std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr); return I; } diff --git a/llvm/unittests/ADT/SmallVectorTest.cpp b/llvm/unittests/ADT/SmallVectorTest.cpp index d97ab577524f..1f6c7db99fa8 100644 --- a/llvm/unittests/ADT/SmallVectorTest.cpp +++ b/llvm/unittests/ADT/SmallVectorTest.cpp @@ -53,6 +53,7 @@ class Constructable { Constructable(Constructable && src) : constructed(true) { value = src.value; + src.value = 0; ++numConstructorCalls; ++numMoveConstructorCalls; } @@ -74,6 +75,7 @@ class Constructable { Constructable & operator=(Constructable && src) { EXPECT_TRUE(constructed); value = src.value; + src.value = 0; ++numAssignmentCalls; ++numMoveAssignmentCalls; return *this; @@ -1056,11 +1058,16 @@ class SmallVectorReferenceInvalidationTest : public SmallVectorTestBase { return N; } + template <class T> static bool isValueType() { + return std::is_same<T, typename VectorT::value_type>::value; + } + void SetUp() override { SmallVectorTestBase::SetUp(); // Fill up the small size so that insertions move the elements. - V.append({0, 0, 0}); + for (int I = 0, E = NumBuiltinElts(V); I != E; ++I) + V.emplace_back(I + 1); } }; @@ -1074,39 +1081,84 @@ TYPED_TEST_CASE(SmallVectorReferenceInvalidationTest, SmallVectorReferenceInvalidationTestTypes); TYPED_TEST(SmallVectorReferenceInvalidationTest, PushBack) { + // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode. auto &V = this->V; - (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.push_back(V.back()), this->AssertionMessage); -#endif + int N = this->NumBuiltinElts(V); + + // Push back a reference to last element when growing from small storage. + V.push_back(V.back()); + EXPECT_EQ(N, V.back()); + + // Check that the old value is still there (not moved away). + EXPECT_EQ(N, V[V.size() - 2]); + + // Fill storage again. + V.back() = V.size(); + while (V.size() < V.capacity()) + V.push_back(V.size() + 1); + + // Push back a reference to last element when growing from large storage. + V.push_back(V.back()); + EXPECT_EQ(int(V.size()) - 1, V.back()); } TYPED_TEST(SmallVectorReferenceInvalidationTest, PushBackMoved) { + // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode. auto &V = this->V; - (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.push_back(std::move(V.back())), this->AssertionMessage); -#endif + int N = this->NumBuiltinElts(V); + + // Push back a reference to last element when growing from small storage. + V.push_back(std::move(V.back())); + EXPECT_EQ(N, V.back()); + if (this->template isValueType<Constructable>()) { + // Check that the value was moved (not copied). + EXPECT_EQ(0, V[V.size() - 2]); + } + + // Fill storage again. + V.back() = V.size(); + while (V.size() < V.capacity()) + V.push_back(V.size() + 1); + + // Push back a reference to last element when growing from large storage. + V.push_back(std::move(V.back())); + + // Check the values. + EXPECT_EQ(int(V.size()) - 1, V.back()); + if (this->template isValueType<Constructable>()) { + // Check the value got moved out. + EXPECT_EQ(0, V[V.size() - 2]); + } } TYPED_TEST(SmallVectorReferenceInvalidationTest, Resize) { auto &V = this->V; (void)V; int N = this->NumBuiltinElts(V); -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.resize(N + 1, V.back()), this->AssertionMessage); -#endif - - // No assertion when shrinking, since the parameter isn't accessed. - V.resize(N - 1, V.back()); + V.resize(N + 1, V.back()); + EXPECT_EQ(N, V.back()); + + // Resize to add enough elements that V will grow again. If reference + // invalidation breaks in the future, sanitizers should be able to catch a + // use-after-free here. + V.resize(V.capacity() + 1, V.front()); + EXPECT_EQ(1, V.back()); } TYPED_TEST(SmallVectorReferenceInvalidationTest, Append) { auto &V = this->V; (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.append(1, V.back()), this->AssertionMessage); -#endif + V.append(1, V.back()); + int N = this->NumBuiltinElts(V); + EXPECT_EQ(N, V[N - 1]); + + // Append enough more elements that V will grow again. This tests growing + // when already in large mode. + // + // If reference invalidation breaks in the future, sanitizers should be able + // to catch a use-after-free here. + V.append(V.capacity() - V.size() + 1, V.front()); + EXPECT_EQ(1, V.back()); } TYPED_TEST(SmallVectorReferenceInvalidationTest, AppendRange) { @@ -1150,28 +1202,72 @@ TYPED_TEST(SmallVectorReferenceInvalidationTest, AssignRange) { } TYPED_TEST(SmallVectorReferenceInvalidationTest, Insert) { + // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode. auto &V = this->V; (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.insert(V.begin(), V.back()), this->AssertionMessage); -#endif + + // Insert a reference to the back (not at end() or else insert delegates to + // push_back()), growing out of small mode. Confirm the value was copied out + // (moving out Constructable sets it to 0). + V.insert(V.begin(), V.back()); + EXPECT_EQ(int(V.size() - 1), V.front()); + EXPECT_EQ(int(V.size() - 1), V.back()); + + // Fill up the vector again. + while (V.size() < V.capacity()) + V.push_back(V.size() + 1); + + // Grow again from large storage to large storage. + V.insert(V.begin(), V.back()); + EXPECT_EQ(int(V.size() - 1), V.front()); + EXPECT_EQ(int(V.size() - 1), V.back()); } TYPED_TEST(SmallVectorReferenceInvalidationTest, InsertMoved) { + // Note: setup adds [1, 2, ...] to V until it's at capacity in small mode. auto &V = this->V; (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.insert(V.begin(), std::move(V.back())), - this->AssertionMessage); -#endif + + // Insert a reference to the back (not at end() or else insert delegates to + // push_back()), growing out of small mode. Confirm the value was copied out + // (moving out Constructable sets it to 0). + V.insert(V.begin(), std::move(V.back())); + EXPECT_EQ(int(V.size() - 1), V.front()); + if (this->template isValueType<Constructable>()) { + // Check the value got moved out. + EXPECT_EQ(0, V.back()); + } + + // Fill up the vector again. + while (V.size() < V.capacity()) + V.push_back(V.size() + 1); + + // Grow again from large storage to large storage. + V.insert(V.begin(), std::move(V.back())); + EXPECT_EQ(int(V.size() - 1), V.front()); + if (this->template isValueType<Constructable>()) { + // Check the value got moved out. + EXPECT_EQ(0, V.back()); + } } TYPED_TEST(SmallVectorReferenceInvalidationTest, InsertN) { auto &V = this->V; (void)V; -#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST - EXPECT_DEATH(V.insert(V.begin(), 2, V.back()), this->AssertionMessage); -#endif + + // Cover NumToInsert <= this->end() - I. + V.insert(V.begin() + 1, 1, V.back()); + int N = this->NumBuiltinElts(V); + EXPECT_EQ(N, V[1]); + + // Cover NumToInsert > this->end() - I, inserting enough elements that V will + // also grow again; V.capacity() will be more elements than necessary but + // it's a simple way to cover both conditions. + // + // If reference invalidation breaks in the future, sanitizers should be able + // to catch a use-after-free here. + V.insert(V.begin(), V.capacity(), V.front()); + EXPECT_EQ(1, V.front()); } TYPED_TEST(SmallVectorReferenceInvalidationTest, InsertRange) { _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits