[ 
https://issues.apache.org/jira/browse/ARROW-1878?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16276107#comment-16276107
 ] 

ASF GitHub Bot commented on ARROW-1878:
---------------------------------------

wesm closed pull request #1381: ARROW-1878: [GLib] Add 
garrow_array_dictionary_encode()
URL: https://github.com/apache/arrow/pull/1381
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/c_glib/arrow-glib/basic-array.cpp 
b/c_glib/arrow-glib/basic-array.cpp
index a7afaca95..3c027c0e1 100644
--- a/c_glib/arrow-glib/basic-array.cpp
+++ b/c_glib/arrow-glib/basic-array.cpp
@@ -596,6 +596,44 @@ garrow_array_unique(GArrowArray *array,
   return garrow_array_new_raw(&arrow_unique_array);
 }
 
+/**
+ * garrow_array_dictionary_encode:
+ * @array: A #GArrowArray.
+ * @error: (nullable): Return location for a #GError or %NULL.
+ *
+ * Returns: (nullable) (transfer full):
+ *   A newly created #GArrowDictionarArray for the @array on success,
+ *   %NULL on error.
+ *
+ * Since: 0.8.0
+ */
+GArrowArray *
+garrow_array_dictionary_encode(GArrowArray *array,
+                               GError **error)
+{
+  auto arrow_array = garrow_array_get_raw(array);
+  auto memory_pool = arrow::default_memory_pool();
+  arrow::compute::FunctionContext context(memory_pool);
+  arrow::compute::Datum dictionary_encoded_datum;
+  auto status =
+    arrow::compute::DictionaryEncode(&context,
+                                     arrow::compute::Datum(arrow_array),
+                                     &dictionary_encoded_datum);
+  if (!status.ok()) {
+    std::stringstream message;
+    message << "[array][dictionary-encode] <";
+    message << arrow_array->type()->ToString();
+    message << ">";
+    garrow_error_check(error, status, message.str().c_str());
+    return NULL;
+  }
+
+  auto arrow_dictionary_encoded_array =
+    arrow::MakeArray(dictionary_encoded_datum.array());
+
+  return garrow_array_new_raw(&arrow_dictionary_encoded_array);
+}
+
 
 G_DEFINE_TYPE(GArrowNullArray,               \
               garrow_null_array,             \
diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h
index 613a59b07..420c30ecc 100644
--- a/c_glib/arrow-glib/basic-array.h
+++ b/c_glib/arrow-glib/basic-array.h
@@ -68,6 +68,8 @@ GArrowArray   *garrow_array_cast        (GArrowArray *array,
                                          GError **error);
 GArrowArray   *garrow_array_unique      (GArrowArray *array,
                                          GError **error);
+GArrowArray   *garrow_array_dictionary_encode(GArrowArray *array,
+                                              GError **error);
 
 #define GARROW_TYPE_NULL_ARRAY                  \
   (garrow_null_array_get_type())
diff --git a/c_glib/test/test-dictionary-encode.rb 
b/c_glib/test/test-dictionary-encode.rb
new file mode 100644
index 000000000..ea77be64a
--- /dev/null
+++ b/c_glib/test/test-dictionary-encode.rb
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class TestDictionaryEncode < Test::Unit::TestCase
+  include Helper::Buildable
+  include Helper::Omittable
+
+  def test_int32
+    array = build_int32_array([1, 3, 1, -1, -3, -1])
+    assert_equal(<<-STRING.chomp, array.dictionary_encode.to_s)
+
+-- is_valid: all not null
+-- dictionary: [1, 3, -1, -3]
+-- indices: [0, 1, 0, 2, 3, 2]
+    STRING
+  end
+
+  def test_string
+    array = build_string_array(["Ruby", "Python", "Ruby"])
+    assert_equal(<<-STRING.chomp, array.dictionary_encode.to_s)
+
+-- is_valid: all not null
+-- dictionary: ["Ruby", "Python"]
+-- indices: [0, 1, 0]
+    STRING
+  end
+end


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> [GLib] Add garrow_array_dictionary_encode()
> -------------------------------------------
>
>                 Key: ARROW-1878
>                 URL: https://issues.apache.org/jira/browse/ARROW-1878
>             Project: Apache Arrow
>          Issue Type: New Feature
>          Components: GLib
>            Reporter: Kouhei Sutou
>            Assignee: Kouhei Sutou
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>




--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to