bkietz commented on a change in pull request #10915: URL: https://github.com/apache/arrow/pull/10915#discussion_r687694889
########## File path: cpp/src/arrow/util/small_vector.h ########## @@ -0,0 +1,509 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <initializer_list> +#include <limits> +#include <new> +#include <type_traits> +#include <utility> + +#include "arrow/util/macros.h" + +namespace arrow { +namespace internal { + +#if __cplusplus >= 201703L +using std::launder; +#else +// TODO factor out from result.h + +// template <class T> +// constexpr T* launder(T* p) noexcept { +// return p; +// } +#endif + +template <typename ValueType, typename PointerType, typename ReferenceType> +class VectorIterator { + public: + using value_type = ValueType; + using pointer = PointerType; + using reference = ReferenceType; + using difference_type = ptrdiff_t; + using iterator_category = std::random_access_iterator_tag; + + // Some algorithms need to default-construct an iterator + constexpr VectorIterator() noexcept = default; + + constexpr explicit VectorIterator(pointer ptr) noexcept : ptr_(ptr) {} + + // Value access + constexpr reference operator*() const { return *ptr_; } + + constexpr reference operator[](difference_type n) const { return ptr_[n]; } + + // Forward / backward + VectorIterator& operator++() { + ++ptr_; + return *this; + } + VectorIterator& operator--() { + --ptr_; + return *this; + } + VectorIterator operator++(int) { + VectorIterator tmp(*this); + ++ptr_; + return tmp; + } + VectorIterator operator--(int) { + VectorIterator tmp(*this); + --ptr_; + return tmp; + } + + // Arithmetic + constexpr difference_type operator-(const VectorIterator& other) const { + return ptr_ - other.ptr_; + } + constexpr VectorIterator operator+(difference_type n) const { + return VectorIterator(ptr_ + n); + } + constexpr VectorIterator operator-(difference_type n) const { + return VectorIterator(ptr_ - n); + } + VectorIterator& operator+=(difference_type n) { + ptr_ += n; + return *this; + } + VectorIterator& operator-=(difference_type n) { + ptr_ -= n; + return *this; + } + + // Comparisons + constexpr bool operator==(const VectorIterator& other) const { + return ptr_ == other.ptr_; + } + constexpr bool operator!=(const VectorIterator& other) const { + return ptr_ != other.ptr_; + } + constexpr bool operator<(const VectorIterator& other) const { + return ptr_ < other.ptr_; + } + constexpr bool operator>(const VectorIterator& other) const { + return ptr_ > other.ptr_; + } + constexpr bool operator<=(const VectorIterator& other) const { + return ptr_ <= other.ptr_; + } + constexpr bool operator>=(const VectorIterator& other) const { + return ptr_ >= other.ptr_; + } + + private: + pointer ptr_ = NULLPTR; +}; + +template <typename T> +class StaticVectorMixin { + protected: + // properly aligned uninitialized storage for N T's + using storage_type = typename std::aligned_storage<sizeof(T), alignof(T)>::type; + + static T* ptr_at(storage_type* p, size_t i) { + return launder(reinterpret_cast<T*>(&p[i])); + } + + static const T* ptr_at(const storage_type* p, size_t i) { + return launder(reinterpret_cast<const T*>(&p[i])); + } + + static void move_storage(storage_type* src, storage_type* dest, size_t n) { + for (size_t i = 0; i < n; ++i) { + T* src_item = ptr_at(src, i); + T* dest_item = ptr_at(dest, i); + new (dest_item) T(std::move(*src_item)); + src_item->~T(); + } + } + + static void destroy_storage(storage_type* p, size_t n) { + for (size_t i = 0; i < n; ++i) { + ptr_at(p, i)->~T(); + } + } +}; + +template <typename T, size_t N, bool NonTrivialDestructor> +class StaticVectorBaseStorage : public StaticVectorMixin<T> { Review comment: Could we avoid inheritance here? ########## File path: cpp/src/arrow/util/small_vector.h ########## @@ -0,0 +1,509 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <initializer_list> +#include <limits> +#include <new> +#include <type_traits> +#include <utility> + +#include "arrow/util/macros.h" + +namespace arrow { +namespace internal { + +#if __cplusplus >= 201703L +using std::launder; +#else +// TODO factor out from result.h + +// template <class T> +// constexpr T* launder(T* p) noexcept { +// return p; +// } +#endif + +template <typename ValueType, typename PointerType, typename ReferenceType> +class VectorIterator { + public: + using value_type = ValueType; + using pointer = PointerType; + using reference = ReferenceType; + using difference_type = ptrdiff_t; + using iterator_category = std::random_access_iterator_tag; + + // Some algorithms need to default-construct an iterator + constexpr VectorIterator() noexcept = default; + + constexpr explicit VectorIterator(pointer ptr) noexcept : ptr_(ptr) {} + + // Value access + constexpr reference operator*() const { return *ptr_; } + + constexpr reference operator[](difference_type n) const { return ptr_[n]; } + + // Forward / backward + VectorIterator& operator++() { + ++ptr_; + return *this; + } + VectorIterator& operator--() { + --ptr_; + return *this; + } + VectorIterator operator++(int) { + VectorIterator tmp(*this); + ++ptr_; + return tmp; + } + VectorIterator operator--(int) { + VectorIterator tmp(*this); + --ptr_; + return tmp; + } + + // Arithmetic + constexpr difference_type operator-(const VectorIterator& other) const { + return ptr_ - other.ptr_; + } + constexpr VectorIterator operator+(difference_type n) const { + return VectorIterator(ptr_ + n); + } + constexpr VectorIterator operator-(difference_type n) const { + return VectorIterator(ptr_ - n); + } + VectorIterator& operator+=(difference_type n) { + ptr_ += n; + return *this; + } + VectorIterator& operator-=(difference_type n) { + ptr_ -= n; + return *this; + } + + // Comparisons + constexpr bool operator==(const VectorIterator& other) const { + return ptr_ == other.ptr_; + } + constexpr bool operator!=(const VectorIterator& other) const { + return ptr_ != other.ptr_; + } + constexpr bool operator<(const VectorIterator& other) const { + return ptr_ < other.ptr_; + } + constexpr bool operator>(const VectorIterator& other) const { + return ptr_ > other.ptr_; + } + constexpr bool operator<=(const VectorIterator& other) const { + return ptr_ <= other.ptr_; + } + constexpr bool operator>=(const VectorIterator& other) const { + return ptr_ >= other.ptr_; + } + + private: + pointer ptr_ = NULLPTR; +}; + +template <typename T> +class StaticVectorMixin { + protected: + // properly aligned uninitialized storage for N T's + using storage_type = typename std::aligned_storage<sizeof(T), alignof(T)>::type; + + static T* ptr_at(storage_type* p, size_t i) { + return launder(reinterpret_cast<T*>(&p[i])); + } + + static const T* ptr_at(const storage_type* p, size_t i) { + return launder(reinterpret_cast<const T*>(&p[i])); + } + + static void move_storage(storage_type* src, storage_type* dest, size_t n) { + for (size_t i = 0; i < n; ++i) { + T* src_item = ptr_at(src, i); + T* dest_item = ptr_at(dest, i); + new (dest_item) T(std::move(*src_item)); + src_item->~T(); + } + } + + static void destroy_storage(storage_type* p, size_t n) { + for (size_t i = 0; i < n; ++i) { + ptr_at(p, i)->~T(); + } + } +}; + +template <typename T, size_t N, bool NonTrivialDestructor> +class StaticVectorBaseStorage : public StaticVectorMixin<T> { + protected: + using typename StaticVectorMixin<T>::storage_type; + + storage_type static_data_[N]; + size_t size_ = 0; + + void destroy() {} +}; + +template <typename T, size_t N> +class StaticVectorBaseStorage<T, N, true> : public StaticVectorMixin<T> { + protected: + using typename StaticVectorMixin<T>::storage_type; + + storage_type static_data_[N]; + size_t size_ = 0; + + ~StaticVectorBaseStorage() noexcept { destroy(); } + + void destroy() noexcept { this->destroy_storage(static_data_, size_); } +}; + +template <typename T, size_t N, bool D = !std::is_trivially_destructible<T>::value> +class StaticVectorBase : public StaticVectorBaseStorage<T, N, D> { + protected: + using Base = StaticVectorBaseStorage<T, N, D>; + using typename Base::storage_type; + + using Base::size_; + using Base::static_data_; + + StaticVectorBase() noexcept = default; + + storage_type* data_ptr() { return static_data_; } + + constexpr const storage_type* const_data_ptr() const { return static_data_; } + + void bump_size(size_t addend) { + assert(size_ + addend <= N); + size_ += addend; + } + + void reduce_size(size_t reduce_by) { + assert(reduce_by <= size_); + size_ -= reduce_by; + } + + void move_from(StaticVectorBase&& other) noexcept { + size_ = other.size_; + this->move_storage(other.static_data_, static_data_, size_); + other.size_ = 0; + } + + public: + constexpr size_t capacity() const { return N; } + + constexpr size_t max_size() const { return N; } + + void reserve(size_t n) {} + + void clear() { + this->destroy_storage(static_data_, size_); + size_ = 0; + } +}; + +template <typename T, size_t N> +class SmallVectorBase : public StaticVectorMixin<T> { + protected: + using typename StaticVectorMixin<T>::storage_type; + + storage_type static_data_[N]; + size_t size_ = 0; + storage_type* data_ = static_data_; + size_t dynamic_capacity_ = 0; Review comment: This uses 24 bytes in addition to the static elements. I think it'd be worthwhile to compress this further. Here's a sketch which uses a minimum of 16 bytes (for example, `SmallVector<int32_t, 3>` would be only 16 bytes) ```c++ template <typename T, size_t N> struct LittleEndianState { using Storage = StaticVectorMixin<T>; using storage_type = typename Storage::storage_type; struct DynamicState { // low bit will be zero for alignment T* data_; uint32_t size_, capacity_; }; struct SmallState { // low bit will be one for discrimination uint8_t shifted_size_; storage_type storage_[N]; size_t size() const { return shifted_size_ >> 1; } }; union { // low bit will be set if small uint8_t discriminant_; DynamicState dynamic_; SmallState small_; }; bool is_small() const { return discriminant_ & 1; } size_t size() const { return is_small() ? small_.size() >> 1 : dynamic_.size_; } }; template <typename T, size_t N> struct BigEndianState { using Storage = StaticVectorMixin<T>; using storage_type = typename Storage::storage_type; struct DynamicState { // high bit will be zero for discrimination uint32_t size_, capacity_; T* data_; }; struct SmallState { // high bit will be one for discrimination uint8_t shifted_size_; storage_type storage_[N]; size_t size() const { return shifted_size_ & 0b0111'1111; } }; union { // high bit will be set if small uint8_t discriminant_; DynamicState dynamic_; SmallState small_; }; bool is_small() const { return discriminant_ & 0b1000'0000; } size_t size() const { return is_small() ? small_.size() >> 1 : dynamic_.size_; } }; template <typename T, size_t N> struct SmallVector { #ifdef BIGENDIAN using State = BigEndianState<T, N>; #else using State = LittleEndianState<T, N>; #endif State state_; using Storage = StaticVectorMixin<T>; using storage_type = typename Storage::storage_type; T* data() { return state_.is_small() ? Storage::ptr_at(state_.small_.storage_, 0) : state_.dynamic_.data_; } ~SmallVector() { if (state_.is_small()) { Storage::destroy_storage(state_.small_.storage_, state_.small_.size()); } else { delete[] state_.dynamic_.data_; } } // ... }; ``` ########## File path: cpp/src/arrow/util/small_vector.h ########## @@ -0,0 +1,509 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <initializer_list> +#include <limits> +#include <new> +#include <type_traits> +#include <utility> + +#include "arrow/util/macros.h" + +namespace arrow { +namespace internal { + +#if __cplusplus >= 201703L +using std::launder; +#else +// TODO factor out from result.h + +// template <class T> +// constexpr T* launder(T* p) noexcept { +// return p; +// } +#endif + +template <typename ValueType, typename PointerType, typename ReferenceType> +class VectorIterator { Review comment: I'm not sure this is worthwhile; `std::iterator_traits` is already specialized for `int*` and `const int*` so I think you could just `using iterator = int*; using const_iterator = const int*;` below -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org