[ 
https://issues.apache.org/jira/browse/ARROW-2351?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16423693#comment-16423693
 ] 

ASF GitHub Bot commented on ARROW-2351:
---------------------------------------

xhochy closed pull request #1803: ARROW-2351 [C++] 
StringBuilder::append(vector<string>...) not impleme…
URL: https://github.com/apache/arrow/pull/1803
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index 2aa73a09a..308bbcd8a 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -989,6 +989,39 @@ TEST_F(TestStringBuilder, TestScalarAppend) {
   }
 }
 
+TEST_F(TestStringBuilder, TestAppendVector) {
+  vector<string> strings = {"", "bb", "a", "", "ccc"};
+  vector<uint8_t> is_null = {0, 0, 0, 1, 0};
+
+  int N = static_cast<int>(strings.size());
+  int reps = 1000;
+
+  for (int j = 0; j < reps; ++j) {
+    ASSERT_OK(builder_->Append(strings, is_null.data()));
+  }
+  Done();
+
+  ASSERT_EQ(reps * N, result_->length());
+  ASSERT_EQ(reps, result_->null_count());
+  ASSERT_EQ(reps * 6, result_->value_data()->size());
+
+  int32_t length;
+  int32_t pos = 0;
+  for (int i = 0; i < N * reps; ++i) {
+    if (is_null[i % N]) {
+      ASSERT_TRUE(result_->IsNull(i));
+    } else {
+      ASSERT_FALSE(result_->IsNull(i));
+      result_->GetValue(i, &length);
+      ASSERT_EQ(pos, result_->value_offset(i));
+      ASSERT_EQ(static_cast<int>(strings[i % N].size()), length);
+      ASSERT_EQ(strings[i % N], result_->GetString(i));
+
+      pos += length;
+    }
+  }
+}
+
 TEST_F(TestStringBuilder, TestZeroLength) {
   // All buffers are null
   Done();
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index aa9f3ce42..ec486566f 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -16,11 +16,11 @@
 // under the License.
 
 #include "arrow/builder.h"
-
 #include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include <limits>
+#include <numeric>
 #include <sstream>
 #include <utility>
 #include <vector>
@@ -1385,6 +1385,28 @@ const uint8_t* BinaryBuilder::GetValue(int64_t i, 
int32_t* out_length) const {
 
 StringBuilder::StringBuilder(MemoryPool* pool) : BinaryBuilder(utf8(), pool) {}
 
+Status StringBuilder::Append(const std::vector<std::string>& values,
+                             uint8_t* null_bytes) {
+  std::size_t total_length = std::accumulate(
+      values.begin(), values.end(), 0ULL,
+      [](uint64_t sum, const std::string& str) { return sum + str.size(); });
+  RETURN_NOT_OK(Reserve(values.size()));
+  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+  RETURN_NOT_OK(offsets_builder_.Reserve(values.size()));
+
+  for (std::size_t i = 0; i < values.size(); ++i) {
+    RETURN_NOT_OK(AppendNextOffset());
+    if (null_bytes[i]) {
+      UnsafeAppendToBitmap(false);
+    } else {
+      RETURN_NOT_OK(value_data_builder_.Append(
+          reinterpret_cast<const uint8_t*>(values[i].data()), 
values[i].size()));
+      UnsafeAppendToBitmap(true);
+    }
+  }
+  return Status::OK();
+}
+
 // ----------------------------------------------------------------------
 // Fixed width binary
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [C++] StringBuilder::append(vector<string>...) not implemented
> --------------------------------------------------------------
>
>                 Key: ARROW-2351
>                 URL: https://issues.apache.org/jira/browse/ARROW-2351
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++
>    Affects Versions: 0.9.0
>            Reporter: Rares Vernica
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 0.10.0
>
>
> For {{StringBuilder}} an {{append(vector<string>, uint8_t*)}} function is 
> [declared|https://github.com/apache/arrow/blob/7b2c79765cf92760e1f8cca079159d9613b86412/cpp/src/arrow/builder.h#L721]
>  and 
> [documented|http://arrow.apache.org/docs/cpp/classarrow_1_1_string_builder.html#a59be34b5e11017a392b4ee019d90da3c]
>  but it does not seem to be implemented.
> {code:java}
> undefined reference to `arrow::StringBuilder::Append(std::vector<std::string, 
> std::allocator<std::string> > const&, unsigned char*)'
> collect2: error: ld returned 1 exit status
> {code}
> Also worth noting is that the similar function in {{NumericBuilder}} uses 
> {{vector<bool>}} for the null values instead of {{uint8_t*}}. It might be 
> worth making them consistent.
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to