[ 
https://issues.apache.org/jira/browse/ARROW-1882?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16279247#comment-16279247
 ] 

ASF GitHub Bot commented on ARROW-1882:
---------------------------------------

wesm commented on a change in pull request #1388: ARROW-1882: [C++] Reintroduce 
DictionaryBuilder
URL: https://github.com/apache/arrow/pull/1388#discussion_r155085087
 
 

 ##########
 File path: cpp/src/arrow/array-test.cc
 ##########
 @@ -1619,6 +1619,353 @@ TEST_F(TestAdaptiveUIntBuilder, TestAppendVector) {
   ASSERT_TRUE(expected_->Equals(result_));
 }
 
+// ----------------------------------------------------------------------
+// Dictionary tests
+
+template <typename Type>
+class TestDictionaryBuilder : public TestBuilder {};
+
+typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
+                         UInt32Type, Int64Type, UInt64Type, FloatType, 
DoubleType>
+    PrimitiveDictionaries;
+
+TYPED_TEST_CASE(TestDictionaryBuilder, PrimitiveDictionaries);
+
+TYPED_TEST(TestDictionaryBuilder, Basic) {
+  DictionaryBuilder<TypeParam> builder(default_memory_pool());
+  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+
+  std::shared_ptr<Array> result;
+  ASSERT_OK(builder.Finish(&result));
+
+  // Build expected data
+  NumericBuilder<TypeParam> dict_builder;
+  ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(1)));
+  ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(2)));
+  std::shared_ptr<Array> dict_array;
+  ASSERT_OK(dict_builder.Finish(&dict_array));
+  auto dtype = std::make_shared<DictionaryType>(int8(), dict_array);
+
+  Int8Builder int_builder;
+  ASSERT_OK(int_builder.Append(0));
+  ASSERT_OK(int_builder.Append(1));
+  ASSERT_OK(int_builder.Append(0));
+  std::shared_ptr<Array> int_array;
+  ASSERT_OK(int_builder.Finish(&int_array));
+
+  DictionaryArray expected(dtype, int_array);
+  ASSERT_TRUE(expected.Equals(result));
+}
+
+TYPED_TEST(TestDictionaryBuilder, ArrayConversion) {
+  NumericBuilder<TypeParam> builder;
+  // DictionaryBuilder<TypeParam> builder;
+  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
+  ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+
+  std::shared_ptr<Array> intermediate_result;
+  ASSERT_OK(builder.Finish(&intermediate_result));
+  DictionaryBuilder<TypeParam> dictionary_builder(default_memory_pool());
+  ASSERT_OK(dictionary_builder.AppendArray(*intermediate_result));
 
 Review comment:
   We might consider removing these `AppendArray` methods in favor of the 
kernel-based approach

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [C++] Reintroduce DictionaryBuilder
> -----------------------------------
>
>                 Key: ARROW-1882
>                 URL: https://issues.apache.org/jira/browse/ARROW-1882
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++
>            Reporter: Uwe L. Korn
>            Assignee: Uwe L. Korn
>            Priority: Critical
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>
> We need the {{DictionaryBuilder}} to incrementally build Arrow Arrays of 
> {{DictionaryType}}. The kernels only support en-bloc conversions of Arrays 
> which yields a higher memory usage.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to