westonpace commented on code in PR #35356: URL: https://github.com/apache/arrow/pull/35356#discussion_r1260347082
########## cpp/src/arrow/compute/dictionary_decode.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/dictionary_decode.h" + +#include <mutex> +#include <sstream> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "arrow/compute/cast.h" +#include "arrow/compute/cast_internal.h" +#include "arrow/compute/exec.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/logging.h" +#include "arrow/util/reflection_internal.h" + +namespace arrow { + +namespace compute { +namespace internal { + +namespace { + +const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an Array", + "The Function will call cast to really decode.", + {"dictionary_array"}}; +class DictionaryDecodeMetaFunction : public MetaFunction { + public: + DictionaryDecodeMetaFunction() + : MetaFunction("dictionary_decode", Arity::Unary(), dictionary_decode_doc) {} + + Result<Datum> ExecuteImpl(const std::vector<Datum>& args, + const FunctionOptions* options, + ExecContext* ctx) const override { + if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) { + return Status::TypeError("Expected a DictonaryArray"); + } Review Comment: Instead of returning an error here it might be nice to just return the input itself. The fact that `dictionary_encode` throws an error if the input is already a dictionary [has been seen as a bad thing](https://github.com/apache/arrow/issues/34890). ########## cpp/src/arrow/compute/dictionary_decode.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/dictionary_decode.h" + +#include <mutex> +#include <sstream> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "arrow/compute/cast.h" +#include "arrow/compute/cast_internal.h" +#include "arrow/compute/exec.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/logging.h" +#include "arrow/util/reflection_internal.h" + +namespace arrow { + +namespace compute { +namespace internal { + +namespace { + +const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an Array", + "The Function will call cast to really decode.", + {"dictionary_array"}}; Review Comment: ```suggestion const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an Array", ("Return a plain-encoded version of the array input\n" "This function does nothing if the input is not a dictionary."), {"dictionary_array"}}; ``` The fact that it calls `cast` is an implementation detail that the user doesn't really need to know. ########## cpp/src/arrow/compute/dictionary_decode.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/dictionary_decode.h" + +#include <mutex> +#include <sstream> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "arrow/compute/cast.h" +#include "arrow/compute/cast_internal.h" +#include "arrow/compute/exec.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/logging.h" +#include "arrow/util/reflection_internal.h" + +namespace arrow { + +namespace compute { +namespace internal { + +namespace { + +const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an Array", + "The Function will call cast to really decode.", + {"dictionary_array"}}; +class DictionaryDecodeMetaFunction : public MetaFunction { + public: + DictionaryDecodeMetaFunction() + : MetaFunction("dictionary_decode", Arity::Unary(), dictionary_decode_doc) {} + + Result<Datum> ExecuteImpl(const std::vector<Datum>& args, + const FunctionOptions* options, + ExecContext* ctx) const override { + if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) { + return Status::TypeError("Expected a DictonaryArray"); + } + + if (args[0].is_array()) { + ARROW_CHECK_NE(args[0].array()->dictionary, nullptr); + TypeHolder to_type(args[0].array()->dictionary->type); + CastOptions castOption = CastOptions::Safe(to_type); Review Comment: ```suggestion CastOptions cast_option = CastOptions::Safe(to_type); ``` ########## cpp/src/arrow/compute/dictionary_decode.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/dictionary_decode.h" + +#include <mutex> +#include <sstream> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "arrow/compute/cast.h" +#include "arrow/compute/cast_internal.h" +#include "arrow/compute/exec.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/logging.h" +#include "arrow/util/reflection_internal.h" + +namespace arrow { + +namespace compute { +namespace internal { + +namespace { + +const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an Array", + "The Function will call cast to really decode.", + {"dictionary_array"}}; +class DictionaryDecodeMetaFunction : public MetaFunction { + public: + DictionaryDecodeMetaFunction() + : MetaFunction("dictionary_decode", Arity::Unary(), dictionary_decode_doc) {} + + Result<Datum> ExecuteImpl(const std::vector<Datum>& args, + const FunctionOptions* options, + ExecContext* ctx) const override { + if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) { + return Status::TypeError("Expected a DictonaryArray"); + } + + if (args[0].is_array()) { + ARROW_CHECK_NE(args[0].array()->dictionary, nullptr); + TypeHolder to_type(args[0].array()->dictionary->type); + CastOptions castOption = CastOptions::Safe(to_type); + return CallFunction("cast", args, &castOption, ctx); + } else if (args[0].is_chunked_array()) { + ARROW_CHECK_NE(args[0].chunked_array()->chunk(0), nullptr); Review Comment: This will fail for an empty chunked array (e.g. zero chunks) ########## cpp/src/arrow/compute/dictionary_decode.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/dictionary_decode.h" + +#include <mutex> +#include <sstream> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "arrow/compute/cast.h" +#include "arrow/compute/cast_internal.h" +#include "arrow/compute/exec.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/logging.h" +#include "arrow/util/reflection_internal.h" + +namespace arrow { + +namespace compute { +namespace internal { + +namespace { + +const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an Array", + "The Function will call cast to really decode.", + {"dictionary_array"}}; +class DictionaryDecodeMetaFunction : public MetaFunction { + public: + DictionaryDecodeMetaFunction() + : MetaFunction("dictionary_decode", Arity::Unary(), dictionary_decode_doc) {} + + Result<Datum> ExecuteImpl(const std::vector<Datum>& args, + const FunctionOptions* options, + ExecContext* ctx) const override { + if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) { + return Status::TypeError("Expected a DictonaryArray"); + } + + if (args[0].is_array()) { + ARROW_CHECK_NE(args[0].array()->dictionary, nullptr); + TypeHolder to_type(args[0].array()->dictionary->type); + CastOptions castOption = CastOptions::Safe(to_type); + return CallFunction("cast", args, &castOption, ctx); + } else if (args[0].is_chunked_array()) { + ARROW_CHECK_NE(args[0].chunked_array()->chunk(0), nullptr); + ARROW_CHECK_NE(args[0].chunked_array()->chunk(0)->data(), nullptr); + ARROW_CHECK_NE(args[0].chunked_array()->chunk(0)->data()->dictionary, nullptr); + TypeHolder to_type(args[0].chunked_array()->chunk(0)->data()->dictionary->type); + CastOptions castOption = CastOptions::Safe(to_type); Review Comment: ```suggestion CastOptions cast_option = CastOptions::Safe(to_type); ``` ########## cpp/src/arrow/compute/dictionary_decode.h: ########## @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> +#include <vector> + +#include "arrow/compute/function.h" +#include "arrow/compute/type_fwd.h" +#include "arrow/result.h" +#include "arrow/status.h" +#include "arrow/type.h" +#include "arrow/util/macros.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +class Array; + +namespace compute { + +class ExecContext; + +// ---------------------------------------------------------------------- +// Convenience invocation APIs for a number of kernels + +/// \brief decode a dictionary encoded array to normal array Review Comment: ```suggestion /// \brief decode a dictionary encoded array to a normal array ``` ########## cpp/src/arrow/compute/dictionary_decode.cc: ########## @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/dictionary_decode.h" + +#include <mutex> +#include <sstream> +#include <string> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include "arrow/compute/cast.h" +#include "arrow/compute/cast_internal.h" +#include "arrow/compute/exec.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/logging.h" +#include "arrow/util/reflection_internal.h" + +namespace arrow { + +namespace compute { +namespace internal { + +namespace { + +const FunctionDoc dictionary_decode_doc{"Decodes a DictionaryArray to an Array", + "The Function will call cast to really decode.", + {"dictionary_array"}}; +class DictionaryDecodeMetaFunction : public MetaFunction { + public: + DictionaryDecodeMetaFunction() + : MetaFunction("dictionary_decode", Arity::Unary(), dictionary_decode_doc) {} + + Result<Datum> ExecuteImpl(const std::vector<Datum>& args, + const FunctionOptions* options, + ExecContext* ctx) const override { + if (args[0].type() == nullptr || args[0].type()->id() != Type::DICTIONARY) { + return Status::TypeError("Expected a DictonaryArray"); + } + + if (args[0].is_array()) { + ARROW_CHECK_NE(args[0].array()->dictionary, nullptr); Review Comment: Instead of grabbing the dictionary from the array/chunk it would probably be easier to grab it from the datum's type property. Then you don't even need the if/else loop I think: ``` DictionaryType* dict_type = checked_cast<DictionaryType>(args[0].type().get()); CastOptions cast_options = CastOptions::Safe(dict_type->value_type()); ``` ########## cpp/src/arrow/compute/dictionary_decode.h: ########## @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <memory> +#include <string> +#include <vector> + +#include "arrow/compute/function.h" +#include "arrow/compute/type_fwd.h" +#include "arrow/result.h" +#include "arrow/status.h" +#include "arrow/type.h" +#include "arrow/util/macros.h" +#include "arrow/util/visibility.h" + +namespace arrow { + +class Array; + +namespace compute { + +class ExecContext; + +// ---------------------------------------------------------------------- +// Convenience invocation APIs for a number of kernels + +/// \brief decode a dictionary encoded array to normal array +/// \param[in] value dictionary array to decode +/// \param[in] ctx the function execution context, optional +/// \return the resulting array +/// +ARROW_EXPORT +Result<std::shared_ptr<Array>> DictionaryDecode(const Array& value, + ExecContext* ctx = NULLPTR); + +/// \brief decode a dictionary encoded array to normal array Review Comment: ```suggestion /// \brief decode a dictionary encoded array to a normal array ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
