Repository: arrow Updated Branches: refs/heads/master a3514a388 -> 5abd12321
ARROW-1468: [C++] Add primitive Append variants that accept std::vector<T> Having to always unbox std::vector in user code is a rough edge in practice -- this makes things simpler in third party applications. Author: Wes McKinney <[email protected]> Closes #1053 from wesm/ARROW-1468 and squashes the following commits: 2546d8fe [Wes McKinney] Add primitive Append variants that accept std::vector<T> Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/5abd1232 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/5abd1232 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/5abd1232 Branch: refs/heads/master Commit: 5abd12321e5730ffd8e16207d86c479f5eb362a5 Parents: a3514a3 Author: Wes McKinney <[email protected]> Authored: Wed Sep 6 07:36:09 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Wed Sep 6 07:36:09 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/array-test.cc | 19 +++++++++++++++++-- cpp/src/arrow/builder.cc | 34 ++++++++++++++++++++++++++++++++++ cpp/src/arrow/builder.h | 29 +++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/5abd1232/cpp/src/arrow/array-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index a73a73e..5d9eb18 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -614,22 +614,32 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVectorStdBool) { is_valid.push_back(this->valid_bytes_[i] != 0); } ASSERT_OK(this->builder_->Append(draws.data(), K, is_valid)); + ASSERT_OK(this->builder_nn_->Append(draws.data(), K)); ASSERT_EQ(1000, this->builder_->length()); ASSERT_EQ(1024, this->builder_->capacity()); + ASSERT_EQ(1000, this->builder_nn_->length()); + ASSERT_EQ(1024, this->builder_nn_->capacity()); // Append the next 9000 is_valid.clear(); + std::vector<T> partial_draws; for (int64_t i = K; i < size; ++i) { + partial_draws.push_back(draws[i]); is_valid.push_back(this->valid_bytes_[i] != 0); } - ASSERT_OK(this->builder_->Append(draws.data() + K, size - K, is_valid)); + ASSERT_OK(this->builder_->Append(partial_draws, is_valid)); + ASSERT_OK(this->builder_nn_->Append(partial_draws)); ASSERT_EQ(size, this->builder_->length()); ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); + ASSERT_EQ(size, this->builder_nn_->length()); + ASSERT_EQ(BitUtil::NextPower2(size), this->builder_->capacity()); + this->Check(this->builder_, true); + this->Check(this->builder_nn_, false); } TYPED_TEST(TestPrimitiveBuilder, TestAdvance) { @@ -671,6 +681,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) { TEST(TestBooleanBuilder, TestStdBoolVectorAppend) { BooleanBuilder builder; + BooleanBuilder builder_nn; std::vector<bool> values, is_valid; @@ -686,12 +697,15 @@ TEST(TestBooleanBuilder, TestStdBoolVectorAppend) { chunk_is_valid.push_back(is_valid[i]); } ASSERT_OK(builder.Append(chunk_values, chunk_is_valid)); + ASSERT_OK(builder_nn.Append(chunk_values)); } - std::shared_ptr<Array> result; + std::shared_ptr<Array> result, result_nn; ASSERT_OK(builder.Finish(&result)); + ASSERT_OK(builder_nn.Finish(&result_nn)); const auto& arr = static_cast<const BooleanArray&>(*result); + const auto& arr_nn = static_cast<const BooleanArray&>(*result_nn); for (int i = 0; i < length; ++i) { if (is_valid[i]) { ASSERT_FALSE(arr.IsNull(i)); @@ -699,6 +713,7 @@ TEST(TestBooleanBuilder, TestStdBoolVectorAppend) { } else { ASSERT_TRUE(arr.IsNull(i)); } + ASSERT_EQ(values[i], arr_nn.Value(i)); } } http://git-wip-us.apache.org/repos/asf/arrow/blob/5abd1232/cpp/src/arrow/builder.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 0129dc8..7966241 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -292,6 +292,17 @@ Status PrimitiveBuilder<T>::Append(const value_type* values, int64_t length, } template <typename T> +Status PrimitiveBuilder<T>::Append(const std::vector<value_type>& values, + const std::vector<bool>& is_valid) { + return Append(values.data(), static_cast<int64_t>(values.size()), is_valid); +} + +template <typename T> +Status PrimitiveBuilder<T>::Append(const std::vector<value_type>& values) { + return Append(values.data(), static_cast<int64_t>(values.size())); +} + +template <typename T> Status PrimitiveBuilder<T>::Finish(std::shared_ptr<Array>* out) { const int64_t bytes_required = TypeTraits<T>::bytes_required(length_); if (bytes_required > 0 && bytes_required < data_->size()) { @@ -750,6 +761,7 @@ Status BooleanBuilder::Append(const uint8_t* values, int64_t length, Status BooleanBuilder::Append(const uint8_t* values, int64_t length, const std::vector<bool>& is_valid) { RETURN_NOT_OK(Reserve(length)); + DCHECK_EQ(length, static_cast<int64_t>(is_valid.size())); for (int64_t i = 0; i < length; ++i) { BitUtil::SetBitTo(raw_data_, length_ + i, values[i] != 0); @@ -760,10 +772,20 @@ Status BooleanBuilder::Append(const uint8_t* values, int64_t length, return Status::OK(); } +Status BooleanBuilder::Append(const std::vector<uint8_t>& values, + const std::vector<bool>& is_valid) { + return Append(values.data(), static_cast<int64_t>(values.size()), is_valid); +} + +Status BooleanBuilder::Append(const std::vector<uint8_t>& values) { + return Append(values.data(), static_cast<int64_t>(values.size())); +} + Status BooleanBuilder::Append(const std::vector<bool>& values, const std::vector<bool>& is_valid) { const int64_t length = static_cast<int64_t>(values.size()); RETURN_NOT_OK(Reserve(length)); + DCHECK_EQ(length, static_cast<int64_t>(is_valid.size())); for (int64_t i = 0; i < length; ++i) { BitUtil::SetBitTo(raw_data_, length_ + i, values[i]); @@ -774,6 +796,18 @@ Status BooleanBuilder::Append(const std::vector<bool>& values, return Status::OK(); } +Status BooleanBuilder::Append(const std::vector<bool>& values) { + const int64_t length = static_cast<int64_t>(values.size()); + RETURN_NOT_OK(Reserve(length)); + + for (int64_t i = 0; i < length; ++i) { + BitUtil::SetBitTo(raw_data_, length_ + i, values[i]); + } + + ArrayBuilder::UnsafeSetNotNull(length); + return Status::OK(); +} + // ---------------------------------------------------------------------- // DictionaryBuilder http://git-wip-us.apache.org/repos/asf/arrow/blob/5abd1232/cpp/src/arrow/builder.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index a99e682..3e8289f 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -220,6 +220,18 @@ class ARROW_EXPORT PrimitiveBuilder : public ArrayBuilder { Status Append(const value_type* values, int64_t length, const std::vector<bool>& is_valid); + /// \brief Append a sequence of elements in one shot + /// \param[in] values a std::vector of values + /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null + /// (0). Equal in length to values + /// \return Status + Status Append(const std::vector<value_type>& values, const std::vector<bool>& is_valid); + + /// \brief Append a sequence of elements in one shot + /// \param[in] values a std::vector of values + /// \return Status + Status Append(const std::vector<value_type>& values); + Status Finish(std::shared_ptr<Array>* out) override; Status Init(int64_t capacity) override; @@ -556,12 +568,29 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder { Status Append(const uint8_t* values, int64_t length, const std::vector<bool>& is_valid); /// \brief Append a sequence of elements in one shot + /// \param[in] values a std::vector of bytes + /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null + /// (0). Equal in length to values + /// \return Status + Status Append(const std::vector<uint8_t>& values, const std::vector<bool>& is_valid); + + /// \brief Append a sequence of elements in one shot + /// \param[in] values a std::vector of bytes + /// \return Status + Status Append(const std::vector<uint8_t>& values); + + /// \brief Append a sequence of elements in one shot /// \param[in] values an std::vector<bool> indicating true (1) or false /// \param[in] is_valid an std::vector<bool> indicating valid (1) or null /// (0). Equal in length to values /// \return Status Status Append(const std::vector<bool>& values, const std::vector<bool>& is_valid); + /// \brief Append a sequence of elements in one shot + /// \param[in] values an std::vector<bool> indicating true (1) or false + /// \return Status + Status Append(const std::vector<bool>& values); + Status Finish(std::shared_ptr<Array>* out) override; Status Init(int64_t capacity) override;
