westonpace commented on code in PR #35356:
URL: https://github.com/apache/arrow/pull/35356#discussion_r1260347082


##########
cpp/src/arrow/compute/dictionary_decode.cc:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/dictionary_decode.h"
+
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an 
Array",
+                                        "The Function will call cast to really 
decode.",
+                                        {"dictionary_array"}};
+class DictionaryDecodeMetaFunction : public MetaFunction {
+ public:
+  DictionaryDecodeMetaFunction()
+      : MetaFunction("dictionary_decode", Arity::Unary(), 
dictionary_decode_doc) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options,
+                            ExecContext* ctx) const override {
+    if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) 
{
+      return Status::TypeError("Expected a DictonaryArray");
+    }

Review Comment:
   Instead of returning an error here it might be nice to just return the input 
itself.  The fact that `dictionary_encode` throws an error if the input is 
already a dictionary [has been seen as a bad 
thing](https://github.com/apache/arrow/issues/34890).



##########
cpp/src/arrow/compute/dictionary_decode.cc:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/dictionary_decode.h"
+
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an 
Array",
+                                        "The Function will call cast to really 
decode.",
+                                        {"dictionary_array"}};

Review Comment:
   ```suggestion
   const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an 
Array",
                                           ("Return a plain-encoded version of 
the array input\n"
                                            "This function does nothing if the 
input is not a dictionary."),
                                           {"dictionary_array"}};
   ```
   The fact that it calls `cast` is an implementation detail that the user 
doesn't really need to know.



##########
cpp/src/arrow/compute/dictionary_decode.cc:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/dictionary_decode.h"
+
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an 
Array",
+                                        "The Function will call cast to really 
decode.",
+                                        {"dictionary_array"}};
+class DictionaryDecodeMetaFunction : public MetaFunction {
+ public:
+  DictionaryDecodeMetaFunction()
+      : MetaFunction("dictionary_decode", Arity::Unary(), 
dictionary_decode_doc) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options,
+                            ExecContext* ctx) const override {
+    if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) 
{
+      return Status::TypeError("Expected a DictonaryArray");
+    }
+
+    if (args[0].is_array()) {
+      ARROW_CHECK_NE(args[0].array()->dictionary, nullptr);
+      TypeHolder to_type(args[0].array()->dictionary->type);
+      CastOptions castOption = CastOptions::Safe(to_type);

Review Comment:
   ```suggestion
         CastOptions cast_option = CastOptions::Safe(to_type);
   ```



##########
cpp/src/arrow/compute/dictionary_decode.cc:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/dictionary_decode.h"
+
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an 
Array",
+                                        "The Function will call cast to really 
decode.",
+                                        {"dictionary_array"}};
+class DictionaryDecodeMetaFunction : public MetaFunction {
+ public:
+  DictionaryDecodeMetaFunction()
+      : MetaFunction("dictionary_decode", Arity::Unary(), 
dictionary_decode_doc) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options,
+                            ExecContext* ctx) const override {
+    if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) 
{
+      return Status::TypeError("Expected a DictonaryArray");
+    }
+
+    if (args[0].is_array()) {
+      ARROW_CHECK_NE(args[0].array()->dictionary, nullptr);
+      TypeHolder to_type(args[0].array()->dictionary->type);
+      CastOptions castOption = CastOptions::Safe(to_type);
+      return CallFunction("cast", args, &castOption, ctx);
+    } else if (args[0].is_chunked_array()) {
+      ARROW_CHECK_NE(args[0].chunked_array()->chunk(0), nullptr);

Review Comment:
   This will fail for an empty chunked array (e.g. zero chunks)



##########
cpp/src/arrow/compute/dictionary_decode.cc:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/dictionary_decode.h"
+
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an 
Array",
+                                        "The Function will call cast to really 
decode.",
+                                        {"dictionary_array"}};
+class DictionaryDecodeMetaFunction : public MetaFunction {
+ public:
+  DictionaryDecodeMetaFunction()
+      : MetaFunction("dictionary_decode", Arity::Unary(), 
dictionary_decode_doc) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options,
+                            ExecContext* ctx) const override {
+    if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) 
{
+      return Status::TypeError("Expected a DictonaryArray");
+    }
+
+    if (args[0].is_array()) {
+      ARROW_CHECK_NE(args[0].array()->dictionary, nullptr);
+      TypeHolder to_type(args[0].array()->dictionary->type);
+      CastOptions castOption = CastOptions::Safe(to_type);
+      return CallFunction("cast", args, &castOption, ctx);
+    } else if (args[0].is_chunked_array()) {
+      ARROW_CHECK_NE(args[0].chunked_array()->chunk(0), nullptr);
+      ARROW_CHECK_NE(args[0].chunked_array()->chunk(0)->data(), nullptr);
+      ARROW_CHECK_NE(args[0].chunked_array()->chunk(0)->data()->dictionary, 
nullptr);
+      TypeHolder 
to_type(args[0].chunked_array()->chunk(0)->data()->dictionary->type);
+      CastOptions castOption = CastOptions::Safe(to_type);

Review Comment:
   ```suggestion
         CastOptions cast_option = CastOptions::Safe(to_type);
   ```



##########
cpp/src/arrow/compute/dictionary_decode.h:
##########
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+
+namespace compute {
+
+class ExecContext;
+
+// ----------------------------------------------------------------------
+// Convenience invocation APIs for a number of kernels
+
+/// \brief decode a dictionary encoded array to normal array

Review Comment:
   ```suggestion
   /// \brief decode a dictionary encoded array to a normal array
   ```



##########
cpp/src/arrow/compute/dictionary_decode.cc:
##########
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/compute/dictionary_decode.h"
+
+#include <mutex>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "arrow/compute/cast.h"
+#include "arrow/compute/cast_internal.h"
+#include "arrow/compute/exec.h"
+#include "arrow/compute/function_internal.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/kernels/codegen_internal.h"
+#include "arrow/compute/registry.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/reflection_internal.h"
+
+namespace arrow {
+
+namespace compute {
+namespace internal {
+
+namespace {
+
+const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an 
Array",
+                                        "The Function will call cast to really 
decode.",
+                                        {"dictionary_array"}};
+class DictionaryDecodeMetaFunction : public MetaFunction {
+ public:
+  DictionaryDecodeMetaFunction()
+      : MetaFunction("dictionary_decode", Arity::Unary(), 
dictionary_decode_doc) {}
+
+  Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
+                            const FunctionOptions* options,
+                            ExecContext* ctx) const override {
+    if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) 
{
+      return Status::TypeError("Expected a DictonaryArray");
+    }
+
+    if (args[0].is_array()) {
+      ARROW_CHECK_NE(args[0].array()->dictionary, nullptr);

Review Comment:
   Instead of grabbing the dictionary from the array/chunk it would probably be 
easier to grab it from the datum's type property.  Then you don't even need the 
if/else loop I think:
   
   ```
     DictionaryType* dict_type = 
checked_cast<DictionaryType>(args[0].type().get());
     CastOptions cast_options = CastOptions::Safe(dict_type->value_type());
   ```



##########
cpp/src/arrow/compute/dictionary_decode.h:
##########
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/compute/function.h"
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Array;
+
+namespace compute {
+
+class ExecContext;
+
+// ----------------------------------------------------------------------
+// Convenience invocation APIs for a number of kernels
+
+/// \brief decode a dictionary encoded array to normal array
+/// \param[in] value dictionary array to decode
+/// \param[in] ctx the function execution context, optional
+/// \return the resulting array
+///
+ARROW_EXPORT
+Result<std::shared_ptr<Array>> DictionaryDecode(const Array& value,
+                                                ExecContext* ctx = NULLPTR);
+
+/// \brief decode a dictionary encoded array to normal array

Review Comment:
   ```suggestion
   /// \brief decode a dictionary encoded array to a normal array
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to