kszucs commented on a change in pull request #8088: URL: https://github.com/apache/arrow/pull/8088#discussion_r484761256
########## File path: cpp/src/arrow/util/converter.h ########## @@ -0,0 +1,281 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <string> +#include <utility> +#include <vector> + +#include "arrow/array.h" +#include "arrow/builder.h" +#include "arrow/chunked_array.h" +#include "arrow/status.h" +#include "arrow/type.h" +#include "arrow/type_traits.h" +#include "arrow/util/checked_cast.h" + +#include "arrow/visitor_inline.h" + +namespace arrow { + +using internal::checked_cast; +using internal::checked_pointer_cast; + +template <typename Input, typename Options> +class ArrayConverter { + public: + using InputType = Input; + using OptionsType = Options; + + ArrayConverter(const std::shared_ptr<DataType>& type, + std::shared_ptr<ArrayBuilder> builder, Options options) + : sp_type_(type), sp_builder_(builder), options_(options) {} + + virtual ~ArrayConverter() = default; + const std::shared_ptr<ArrayBuilder>& builder() const { return sp_builder_; } + const std::shared_ptr<DataType>& type() const { return sp_type_; } + Options options() const { return options_; } + + virtual Status Init() { return Status::OK(); } + virtual Status Reserve(int64_t additional_capacity) = 0; + virtual Status Append(InputType value) = 0; + virtual Status AppendNull() = 0; + virtual Status Extend(Input seq, int64_t size) = 0; + virtual Result<std::shared_ptr<Array>> Finish() = 0; + + protected: + const std::shared_ptr<DataType> sp_type_; + std::shared_ptr<ArrayBuilder> sp_builder_; + Options options_; +}; + +template <typename T, typename BaseConverter, + typename BuilderType = typename TypeTraits<T>::BuilderType> +class TypedArrayConverter : public BaseConverter { + public: + TypedArrayConverter(const std::shared_ptr<DataType>& type, + std::shared_ptr<ArrayBuilder> builder, + typename BaseConverter::OptionsType options) + : BaseConverter(type, builder, options), + type_(checked_cast<const T&>(*type)), + builder_(checked_cast<BuilderType*>(builder.get())) {} + + Status Reserve(int64_t additional_capacity) override { + return this->builder_->Reserve(additional_capacity); + } + + Status AppendNull() override { return this->builder_->AppendNull(); } + + Result<std::shared_ptr<Array>> Finish() override { return builder_->Finish(); }; + + protected: + const T& type_; + BuilderType* builder_; +}; + +template <typename T, typename BaseConverter> +class PrimitiveArrayConverter : public TypedArrayConverter<T, BaseConverter> { + public: + using TypedArrayConverter<T, BaseConverter>::TypedArrayConverter; +}; + +template <typename T, typename BaseConverter> +class DictionaryArrayConverter + : public TypedArrayConverter<DictionaryType, BaseConverter, DictionaryBuilder<T>> { + public: + DictionaryArrayConverter(const std::shared_ptr<DataType>& type, + std::shared_ptr<ArrayBuilder> builder, + typename BaseConverter::OptionsType options) + : TypedArrayConverter<DictionaryType, BaseConverter, DictionaryBuilder<T>>( + type, builder, options), + value_type_(checked_cast<const T&>( + *checked_cast<const DictionaryType&>(*type).value_type())) {} + + protected: + const T& value_type_; +}; + +template <typename T, typename BaseConverter> +class ListArrayConverter : public TypedArrayConverter<T, BaseConverter> { + public: + ListArrayConverter(const std::shared_ptr<DataType>& type, + std::shared_ptr<ArrayBuilder> builder, + std::shared_ptr<BaseConverter> value_converter, + typename BaseConverter::OptionsType options) + : TypedArrayConverter<T, BaseConverter>(type, builder, options), + value_converter_(std::move(value_converter)) {} + + protected: + std::shared_ptr<BaseConverter> value_converter_; +}; + +template <typename T, typename BaseConverter> +class StructArrayConverter : public TypedArrayConverter<T, BaseConverter> { + public: + StructArrayConverter(const std::shared_ptr<DataType>& type, + std::shared_ptr<ArrayBuilder> builder, + std::vector<std::shared_ptr<BaseConverter>> child_converters, + typename BaseConverter::OptionsType options) + : TypedArrayConverter<T, BaseConverter>(type, builder, options), + child_converters_(std::move(child_converters)) {} + + protected: + std::vector<std::shared_ptr<BaseConverter>> child_converters_; +}; + +#define DICTIONARY_CASE(TYPE_ENUM, TYPE_CLASS) \ + case Type::TYPE_ENUM: \ + out->reset(new DictionaryConverter<TYPE_CLASS>(type, std::move(builder), options)); \ + break; + +template <typename Options, typename BaseConverter, + template <typename...> class PrimitiveConverter, + template <typename...> class DictionaryConverter, + template <typename...> class ListConverter, + template <typename...> class StructConverter> Review comment: I'd like to keep the construction code somewhere else than `python_to_arrow.cc`. I moved it into the internal namespace so we can change it anytime. I could reduce the four converter kinds to two by specializing TypedConverter for list-like and struct type, but not sure how much would it help. The previous code was using static dispatch extensively so I tried to keep it, we could certainly simplify the API by using type visitors but I'm afraid that would cause performance regressions. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org