pitrou commented on a change in pull request #7378:
URL: https://github.com/apache/arrow/pull/7378#discussion_r438027429



##########
File path: cpp/src/arrow/array/array_nested.h
##########
@@ -396,70 +445,43 @@ class ARROW_EXPORT UnionArray : public Array {
   /// \param[in] children Vector of children Arrays containing the data for 
each type.
   /// \param[in] field_names Vector of strings containing the name of each 
field.
   /// \param[in] type_codes Vector of type codes.
-  static Result<std::shared_ptr<Array>> MakeSparse(
-      const Array& type_ids, const std::vector<std::shared_ptr<Array>>& 
children,
-      const std::vector<std::string>& field_names = {},
-      const std::vector<type_code_t>& type_codes = {});
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids,
+                                             const Array& value_offsets,
+                                             ArrayVector children,
+                                             std::vector<std::string> 
field_names = {},
+                                             std::vector<type_code_t> 
type_codes = {});
 
-  /// \brief Construct Sparse UnionArray from type_ids and children
+  /// \brief Construct Dense UnionArray from type_ids and children

Review comment:
       "DenseUnionArray"

##########
File path: cpp/src/arrow/array/array_nested.h
##########
@@ -337,57 +338,105 @@ class ARROW_EXPORT StructArray : public Array {
 /// Concrete Array class for union data
 class ARROW_EXPORT UnionArray : public Array {
  public:
-  using TypeClass = UnionType;
-
   using type_code_t = int8_t;
 
-  explicit UnionArray(const std::shared_ptr<ArrayData>& data);
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
+
+  const type_code_t* raw_type_codes() const { return raw_type_codes_ + 
data_->offset; }
+
+  /// The physical child id containing value at index.
+  int child_id(int64_t i) const {
+    return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
+  }
+
+  const UnionType* union_type() const { return union_type_; }
+
+  UnionMode::type mode() const { return union_type_->mode(); }
+
+  // Return the given field as an individual array.
+  // For sparse unions, the returned array has its offset, length and null
+  // count adjusted.
+  ARROW_DEPRECATED("Deprecated in 1.0.0. Use field(pos)")
+  std::shared_ptr<Array> child(int pos) const;
+
+  /// \brief Return the given field as an individual array.
+  ///
+  /// For sparse unions, the returned array has its offset, length and null
+  /// count adjusted.
+  std::shared_ptr<Array> field(int pos) const;
+
+ protected:
+  void SetData(std::shared_ptr<ArrayData> data);
+
+  const type_code_t* raw_type_codes_;
+  const UnionType* union_type_;
+
+  // For caching boxed child data
+  mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
+};
+
+/// Concrete Array class for union data

Review comment:
       Need to update docstring.

##########
File path: cpp/src/arrow/array/array_nested.h
##########
@@ -337,57 +338,105 @@ class ARROW_EXPORT StructArray : public Array {
 /// Concrete Array class for union data
 class ARROW_EXPORT UnionArray : public Array {
  public:
-  using TypeClass = UnionType;
-
   using type_code_t = int8_t;
 
-  explicit UnionArray(const std::shared_ptr<ArrayData>& data);
+  /// Note that this buffer does not account for any slice offset
+  std::shared_ptr<Buffer> type_codes() const { return data_->buffers[1]; }
+
+  const type_code_t* raw_type_codes() const { return raw_type_codes_ + 
data_->offset; }
+
+  /// The physical child id containing value at index.
+  int child_id(int64_t i) const {
+    return union_type_->child_ids()[raw_type_codes_[i + data_->offset]];
+  }
+
+  const UnionType* union_type() const { return union_type_; }
+
+  UnionMode::type mode() const { return union_type_->mode(); }
+
+  // Return the given field as an individual array.
+  // For sparse unions, the returned array has its offset, length and null
+  // count adjusted.
+  ARROW_DEPRECATED("Deprecated in 1.0.0. Use field(pos)")
+  std::shared_ptr<Array> child(int pos) const;
+
+  /// \brief Return the given field as an individual array.
+  ///
+  /// For sparse unions, the returned array has its offset, length and null
+  /// count adjusted.
+  std::shared_ptr<Array> field(int pos) const;
+
+ protected:
+  void SetData(std::shared_ptr<ArrayData> data);
+
+  const type_code_t* raw_type_codes_;
+  const UnionType* union_type_;
+
+  // For caching boxed child data
+  mutable std::vector<std::shared_ptr<Array>> boxed_fields_;
+};
+
+/// Concrete Array class for union data
+class ARROW_EXPORT SparseUnionArray : public UnionArray {
+ public:
+  using TypeClass = SparseUnionType;
+
+  explicit SparseUnionArray(std::shared_ptr<ArrayData> data);
 
-  UnionArray(const std::shared_ptr<DataType>& type, int64_t length,
-             const std::vector<std::shared_ptr<Array>>& children,
-             const std::shared_ptr<Buffer>& type_ids,
-             const std::shared_ptr<Buffer>& value_offsets = NULLPTR,
-             const std::shared_ptr<Buffer>& null_bitmap = NULLPTR,
-             int64_t null_count = kUnknownNullCount, int64_t offset = 0);
+  SparseUnionArray(std::shared_ptr<DataType> type, int64_t length, ArrayVector 
children,
+                   std::shared_ptr<Buffer> type_ids,
+                   std::shared_ptr<Buffer> null_bitmap = NULLPTR,
+                   int64_t null_count = kUnknownNullCount, int64_t offset = 0);
 
-  /// \brief Construct Dense UnionArray from types_ids, value_offsets and 
children
+  /// \brief Construct SparseUnionArray from type_ids and children
   ///
   /// This function does the bare minimum of validation of the offsets and
-  /// input types. The value_offsets are assumed to be well-formed.
+  /// input types.
   ///
   /// \param[in] type_ids An array of logical type ids for the union type
-  /// \param[in] value_offsets An array of signed int32 values indicating the
-  /// relative offset into the respective child array for the type in a given 
slot.
-  /// The respective offsets for each child value array must be in order / 
increasing.
   /// \param[in] children Vector of children Arrays containing the data for 
each type.
   /// \param[in] field_names Vector of strings containing the name of each 
field.
   /// \param[in] type_codes Vector of type codes.
-  static Result<std::shared_ptr<Array>> MakeDense(
-      const Array& type_ids, const Array& value_offsets,
-      const std::vector<std::shared_ptr<Array>>& children,
-      const std::vector<std::string>& field_names = {},
-      const std::vector<type_code_t>& type_codes = {});
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, 
ArrayVector children,
+                                             std::vector<std::string> 
field_names = {},
+                                             std::vector<type_code_t> 
type_codes = {});
 
-  /// \brief Construct Dense UnionArray from types_ids, value_offsets and 
children
+  /// \brief Construct Sparse UnionArray from type_ids and children
   ///
   /// This function does the bare minimum of validation of the offsets and
-  /// input types. The value_offsets are assumed to be well-formed.
+  /// input types.
   ///
   /// \param[in] type_ids An array of logical type ids for the union type
-  /// \param[in] value_offsets An array of signed int32 values indicating the
-  /// relative offset into the respective child array for the type in a given 
slot.
-  /// The respective offsets for each child value array must be in order / 
increasing.
   /// \param[in] children Vector of children Arrays containing the data for 
each type.
   /// \param[in] type_codes Vector of type codes.
-  static Result<std::shared_ptr<Array>> MakeDense(
-      const Array& type_ids, const Array& value_offsets,
-      const std::vector<std::shared_ptr<Array>>& children,
-      const std::vector<type_code_t>& type_codes) {
-    return MakeDense(type_ids, value_offsets, children, 
std::vector<std::string>{},
-                     type_codes);
+  static Result<std::shared_ptr<Array>> Make(const Array& type_ids, 
ArrayVector children,
+                                             std::vector<type_code_t> 
type_codes) {
+    return Make(std::move(type_ids), std::move(children), 
std::vector<std::string>{},
+                std::move(type_codes));
   }
 
-  /// \brief Construct Sparse UnionArray from type_ids and children
+  const SparseUnionType* union_type() const {
+    return internal::checked_cast<const SparseUnionType*>(union_type_);
+  }
+
+ protected:
+  void SetData(std::shared_ptr<ArrayData> data);
+};
+
+/// Concrete Array class for union data

Review comment:
       Need to update docstring.

##########
File path: cpp/src/arrow/array/array_primitive.h
##########
@@ -108,11 +108,11 @@ class ARROW_EXPORT DayTimeIntervalArray : public 
PrimitiveArray {
   // For compatibility with Take kernel.
   TypeClass::DayMilliseconds GetView(int64_t i) const { return GetValue(i); }
 
-  int32_t byte_width() const { return sizeof(TypeClass::DayMilliseconds); }
-
   const uint8_t* raw_values() const { return raw_values_ + data_->offset * 
byte_width(); }
 
  protected:
+  static constexpr int32_t byte_width() { return 
sizeof(TypeClass::DayMilliseconds); }

Review comment:
       Not sure why you made this protected? The byte width may be useful.

##########
File path: cpp/src/arrow/type_fwd.h
##########
@@ -351,39 +358,21 @@ std::shared_ptr<DataType> ARROW_EXPORT 
time64(TimeUnit::type unit);
 std::shared_ptr<DataType> ARROW_EXPORT
 struct_(const std::vector<std::shared_ptr<Field>>& fields);
 
-/// \brief Create a UnionType instance
-std::shared_ptr<DataType> ARROW_EXPORT
-union_(const std::vector<std::shared_ptr<Field>>& child_fields,

Review comment:
       I think we should keep this factory function, for compatibility and for 
convenience. `UnionMode` still exists, right?

##########
File path: cpp/src/arrow/type.h
##########
@@ -1015,25 +1018,12 @@ class ARROW_EXPORT Decimal128Type : public DecimalType {
 /// \brief Concrete type class for union data
 class ARROW_EXPORT UnionType : public NestedType {
  public:
-  static constexpr Type::type type_id = Type::UNION;
   static constexpr int8_t kMaxTypeCode = 127;
   static constexpr int kInvalidChildId = -1;
 
-  static constexpr const char* type_name() { return "union"; }
-
-  UnionType(const std::vector<std::shared_ptr<Field>>& fields,

Review comment:
       I'm not sure it's a good idea to remove these constructors, while 
external code may rely on them.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to