rok commented on code in PR #38008:
URL: https://github.com/apache/arrow/pull/38008#discussion_r2848506834


##########
cpp/src/arrow/extension/variable_shape_tensor.h:
##########
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+class ARROW_EXPORT VariableShapeTensorArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief Concrete type class for variable-shape Tensor data.
+/// This is a canonical arrow extension type.
+/// See: https://arrow.apache.org/docs/format/CanonicalExtensions.html
+class ARROW_EXPORT VariableShapeTensorType : public ExtensionType {
+ public:
+  VariableShapeTensorType(const std::shared_ptr<DataType>& value_type, int32_t 
ndim,
+                          std::vector<int64_t> permutation = {},
+                          std::vector<std::string> dim_names = {},
+                          std::vector<std::optional<int64_t>> uniform_shape = 
{})
+      : ExtensionType(struct_({::arrow::field("data", list(value_type)),
+                               ::arrow::field("shape", 
fixed_size_list(int32(), ndim))})),
+        value_type_(value_type),
+        ndim_(ndim),
+        permutation_(std::move(permutation)),
+        dim_names_(std::move(dim_names)),
+        uniform_shape_(std::move(uniform_shape)) {}
+
+  std::string extension_name() const override { return 
"arrow.variable_shape_tensor"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  /// Number of dimensions of tensor elements
+  int32_t ndim() const { return ndim_; }
+
+  /// Value type of tensor elements
+  const std::shared_ptr<DataType>& value_type() const { return value_type_; }
+
+  /// Permutation mapping from logical to physical memory layout of tensor 
elements
+  const std::vector<int64_t>& permutation() const { return permutation_; }
+
+  /// Dimension names of tensor elements. Dimensions are ordered physically.
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  /// Shape of uniform dimensions.
+  const std::vector<std::optional<int64_t>>& uniform_shape() const {
+    return uniform_shape_;
+  }
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  std::string Serialize() const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+
+  /// Create a VariableShapeTensorArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
+
+  /// \brief Convert an ExtensionScalar to a Tensor
+  ///
+  /// This method will return a Tensor from ExtensionScalar with strides 
derived
+  /// from shape and permutation stored. Shape and dim_names will be permuted
+  /// according to permutation stored in the VariableShapeTensorType.
+  static Result<std::shared_ptr<Tensor>> MakeTensor(
+      const std::shared_ptr<ExtensionScalar>&);
+
+  /// \brief Create a VariableShapeTensorType instance
+  static Result<std::shared_ptr<DataType>> Make(
+      const std::shared_ptr<DataType>& value_type, int32_t ndim,
+      const std::vector<int64_t>& permutation = {},
+      const std::vector<std::string>& dim_names = {},
+      const std::vector<std::optional<int64_t>>& uniform_shape = {});
+
+ private:
+  std::shared_ptr<DataType> value_type_;
+  int32_t ndim_;
+  std::vector<int64_t> permutation_;
+  std::vector<std::string> dim_names_;
+  std::vector<std::optional<int64_t>> uniform_shape_;
+};
+
+/// \brief Return a VariableShapeTensorType instance.
+ARROW_EXPORT std::shared_ptr<DataType> variable_shape_tensor(
+    const std::shared_ptr<DataType>& value_type, int32_t ndim,
+    const std::vector<int64_t>& permutation = {},
+    const std::vector<std::string>& dim_names = {},
+    const std::vector<std::optional<int64_t>>& uniform_shape = {});

Review Comment:
   Same as above 
https://github.com/apache/arrow/pull/38008/commits/0de39f23c29d25f3da245ac9e297ff03c7d578c9



##########
cpp/src/arrow/extension/variable_shape_tensor.h:
##########
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "arrow/extension_type.h"
+
+namespace arrow::extension {
+
+class ARROW_EXPORT VariableShapeTensorArray : public ExtensionArray {
+ public:
+  using ExtensionArray::ExtensionArray;
+};
+
+/// \brief Concrete type class for variable-shape Tensor data.
+/// This is a canonical arrow extension type.
+/// See: https://arrow.apache.org/docs/format/CanonicalExtensions.html
+class ARROW_EXPORT VariableShapeTensorType : public ExtensionType {
+ public:
+  VariableShapeTensorType(const std::shared_ptr<DataType>& value_type, int32_t 
ndim,
+                          std::vector<int64_t> permutation = {},
+                          std::vector<std::string> dim_names = {},
+                          std::vector<std::optional<int64_t>> uniform_shape = 
{})
+      : ExtensionType(struct_({::arrow::field("data", list(value_type)),
+                               ::arrow::field("shape", 
fixed_size_list(int32(), ndim))})),
+        value_type_(value_type),
+        ndim_(ndim),
+        permutation_(std::move(permutation)),
+        dim_names_(std::move(dim_names)),
+        uniform_shape_(std::move(uniform_shape)) {}
+
+  std::string extension_name() const override { return 
"arrow.variable_shape_tensor"; }
+  std::string ToString(bool show_metadata = false) const override;
+
+  /// Number of dimensions of tensor elements
+  int32_t ndim() const { return ndim_; }
+
+  /// Value type of tensor elements
+  const std::shared_ptr<DataType>& value_type() const { return value_type_; }
+
+  /// Permutation mapping from logical to physical memory layout of tensor 
elements
+  const std::vector<int64_t>& permutation() const { return permutation_; }
+
+  /// Dimension names of tensor elements. Dimensions are ordered physically.
+  const std::vector<std::string>& dim_names() const { return dim_names_; }
+
+  /// Shape of uniform dimensions.
+  const std::vector<std::optional<int64_t>>& uniform_shape() const {
+    return uniform_shape_;
+  }
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  std::string Serialize() const override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized_data) const override;
+
+  /// Create a VariableShapeTensorArray from ArrayData
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
+
+  /// \brief Convert an ExtensionScalar to a Tensor
+  ///
+  /// This method will return a Tensor from ExtensionScalar with strides 
derived
+  /// from shape and permutation stored. Shape and dim_names will be permuted
+  /// according to permutation stored in the VariableShapeTensorType.
+  static Result<std::shared_ptr<Tensor>> MakeTensor(
+      const std::shared_ptr<ExtensionScalar>&);
+
+  /// \brief Create a VariableShapeTensorType instance
+  static Result<std::shared_ptr<DataType>> Make(
+      const std::shared_ptr<DataType>& value_type, int32_t ndim,
+      const std::vector<int64_t>& permutation = {},
+      const std::vector<std::string>& dim_names = {},
+      const std::vector<std::optional<int64_t>>& uniform_shape = {});

Review Comment:
   Done, 
https://github.com/apache/arrow/pull/38008/commits/0de39f23c29d25f3da245ac9e297ff03c7d578c9



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to