pitrou commented on code in PR #37533:
URL: https://github.com/apache/arrow/pull/37533#discussion_r1434181897


##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -203,6 +205,46 @@ std::shared_ptr<Array> FixedShapeTensorType::MakeArray(
   return std::make_shared<ExtensionArray>(data);
 }
 
+const Result<std::shared_ptr<Tensor>> FixedShapeTensorType::GetTensor(
+    const std::shared_ptr<ExtensionScalar>& scalar) const {
+  if (!is_fixed_width(*this->value_type())) {
+    return Status::Invalid("Cannot convert non-fixed-width values to Tensor.");
+  }
+  const auto array =
+      internal::checked_pointer_cast<const 
FixedSizeListScalar>(scalar->value)->value;
+  if (array->null_count() > 0) {
+    return Status::Invalid("Cannot convert data with nulls values to Tensor.");

Review Comment:
   ```suggestion
       return Status::Invalid("Cannot convert data with nulls to Tensor.");
   ```



##########
cpp/src/arrow/extension/fixed_shape_tensor.h:
##########
@@ -94,6 +94,15 @@ class ARROW_EXPORT FixedShapeTensorType : public 
ExtensionType {
   /// Create a FixedShapeTensorArray from ArrayData
   std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
 
+  /// \brief Create a Tensor from a Scalar value from a FixedShapeTensorArray
+  ///
+  /// This method will return a Tensor from FixedShapeTensorArray with strides
+  /// derived from shape and permutation of FixedShapeTensorType. Shape and
+  /// dim_names will be permuted according to permutation stored in the
+  /// FixedShapeTensorType metadata.
+  const Result<std::shared_ptr<Tensor>> GetTensor(
+      const std::shared_ptr<ExtensionScalar>& scalar) const;

Review Comment:
   1. call this MakeTensor?
   2. this can be a static method
   3. `const` on the return type doesn't make sense
   ```suggestion
     static Result<std::shared_ptr<Tensor>> MakeTensor(
         const std::shared_ptr<ExtensionScalar>& scalar);
   ```



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -293,40 +335,49 @@ const Result<std::shared_ptr<Tensor>> 
FixedShapeTensorArray::ToTensor() const {
   // To convert an array of n dimensional tensors to a n+1 dimensional tensor 
we
   // interpret the array's length as the first dimension the new tensor.
 
-  auto ext_arr = std::static_pointer_cast<FixedSizeListArray>(this->storage());
-  auto ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
-  ARROW_RETURN_IF(!is_fixed_width(*ext_arr->value_type()),
-                  Status::Invalid(ext_arr->value_type()->ToString(),
-                                  " is not valid data type for a tensor"));
-  auto permutation = ext_type->permutation();
+  const auto ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
+  const auto value_type = ext_type->value_type();
+  ARROW_RETURN_IF(
+      !is_fixed_width(*value_type),
+      Status::Invalid(value_type->ToString(), " is not valid data type for a 
tensor"));
 
-  std::vector<std::string> dim_names;
-  if (!ext_type->dim_names().empty()) {
-    for (auto i : permutation) {
-      dim_names.emplace_back(ext_type->dim_names()[i]);
+  std::vector<int64_t> permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    for (int64_t i = 0; i < static_cast<int64_t>(ext_type->ndim()); i++) {
+      permutation.emplace_back(i);
     }
-    dim_names.insert(dim_names.begin(), 1, "");
-  } else {
-    dim_names = {};
   }
+  for (int64_t i = 0; i < static_cast<int64_t>(ext_type->ndim()); i++) {
+    permutation[i] += 1;

Review Comment:
   What? Why?



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -321,6 +322,70 @@ TEST_F(TestExtensionType, TestFromTensorType) {
   }
 }
 
+void CheckToTensor(const std::vector<int64_t> values, const int32_t cell_size,

Review Comment:
   Please pass vectors as const-ref.



##########
python/pyarrow/array.pxi:
##########
@@ -3519,17 +3519,42 @@ class FixedShapeTensorArray(ExtensionArray):
 
     def to_numpy_ndarray(self):

Review Comment:
   Why isn't it called `to_numpy` and `from_numpy` btw?



##########
python/pyarrow/scalar.pxi:
##########
@@ -1027,6 +1027,32 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+cdef class FixedShapeTensorScalar(ExtensionScalar):
+    """
+    Concrete class for fixed shape tensor extension scalar.
+    """
+
+    def to_numpy_ndarray(self):
+        """
+        Convert fixed shape tensor extension scalar to a numpy.ndarray with 
zero copy.
+        """
+        return self.to_tensor().to_numpy()
+
+    def to_tensor(self):
+        """
+        Convert fixed shape tensor extension scalar to a pyarrow.Tensor.

Review Comment:
   Not done.



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -293,40 +335,49 @@ const Result<std::shared_ptr<Tensor>> 
FixedShapeTensorArray::ToTensor() const {
   // To convert an array of n dimensional tensors to a n+1 dimensional tensor 
we
   // interpret the array's length as the first dimension the new tensor.
 
-  auto ext_arr = std::static_pointer_cast<FixedSizeListArray>(this->storage());
-  auto ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
-  ARROW_RETURN_IF(!is_fixed_width(*ext_arr->value_type()),
-                  Status::Invalid(ext_arr->value_type()->ToString(),
-                                  " is not valid data type for a tensor"));
-  auto permutation = ext_type->permutation();
+  const auto ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
+  const auto value_type = ext_type->value_type();
+  ARROW_RETURN_IF(
+      !is_fixed_width(*value_type),
+      Status::Invalid(value_type->ToString(), " is not valid data type for a 
tensor"));
 
-  std::vector<std::string> dim_names;
-  if (!ext_type->dim_names().empty()) {
-    for (auto i : permutation) {
-      dim_names.emplace_back(ext_type->dim_names()[i]);
+  std::vector<int64_t> permutation = ext_type->permutation();
+  if (permutation.empty()) {
+    for (int64_t i = 0; i < static_cast<int64_t>(ext_type->ndim()); i++) {
+      permutation.emplace_back(i);
     }
-    dim_names.insert(dim_names.begin(), 1, "");
-  } else {
-    dim_names = {};
   }
+  for (int64_t i = 0; i < static_cast<int64_t>(ext_type->ndim()); i++) {
+    permutation[i] += 1;
+  }
+  permutation.insert(permutation.begin(), 1, 0);
 
-  std::vector<int64_t> shape;
-  for (int64_t& i : permutation) {
-    shape.emplace_back(ext_type->shape()[i]);
-    ++i;
+  std::vector<std::string> dim_names = ext_type->dim_names();
+  if (!dim_names.empty()) {
+    dim_names.insert(dim_names.begin(), 1, "");
+    internal::Permute<std::string>(permutation, &dim_names);
   }
+
+  std::vector<int64_t> shape = ext_type->shape();
+  auto cell_size = std::accumulate(shape.begin(), shape.end(), 
static_cast<int64_t>(1),
+                                   std::multiplies<>());
   shape.insert(shape.begin(), 1, this->length());
-  permutation.insert(permutation.begin(), 1, 0);
+  internal::Permute<int64_t>(permutation, &shape);
 
   std::vector<int64_t> tensor_strides;
-  auto value_type = 
internal::checked_pointer_cast<FixedWidthType>(ext_arr->value_type());
+  const auto fw_value_type = 
internal::checked_pointer_cast<FixedWidthType>(value_type);
   ARROW_RETURN_NOT_OK(
-      ComputeStrides(*value_type.get(), shape, permutation, &tensor_strides));
-  ARROW_ASSIGN_OR_RAISE(auto buffers, ext_arr->Flatten());
+      ComputeStrides(*fw_value_type.get(), shape, permutation, 
&tensor_strides));
+
   ARROW_ASSIGN_OR_RAISE(
-      auto tensor, Tensor::Make(ext_arr->value_type(), 
buffers->data()->buffers[1], shape,
-                                tensor_strides, dim_names));
-  return tensor;
+      const auto flattened_storage_array,
+      
internal::checked_pointer_cast<FixedSizeListArray>(this->storage())->Flatten());

Review Comment:
   Why do you need to call `Flatten`? You can just use the list array's child 
array...



##########
python/pyarrow/array.pxi:
##########
@@ -3519,17 +3519,42 @@ class FixedShapeTensorArray(ExtensionArray):
 
     def to_numpy_ndarray(self):
         """
-        Convert fixed shape tensor extension array to a numpy array (with 
dim+1).
+        Convert fixed shape tensor extension array to a numpy.ndarray with 
zero copy.
+        First dimension of ndarray will be the length of the fixed shape 
tensor array
+        and the rest of the dimensions will match the permuted shape of the 
fixed
+        shape tensor.

Review Comment:
   ```suggestion
           The resulting ndarray will have (ndim + 1) dimensions.
           The size of the first dimension will be the length of the fixed 
shape tensor array
           and the rest of the dimensions will match the permuted shape of the 
fixed
           shape tensor.
           
           The conversion is zero-copy.
   ```



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -203,6 +205,46 @@ std::shared_ptr<Array> FixedShapeTensorType::MakeArray(
   return std::make_shared<ExtensionArray>(data);
 }
 
+const Result<std::shared_ptr<Tensor>> FixedShapeTensorType::GetTensor(
+    const std::shared_ptr<ExtensionScalar>& scalar) const {
+  if (!is_fixed_width(*this->value_type())) {
+    return Status::Invalid("Cannot convert non-fixed-width values to Tensor.");
+  }
+  const auto array =
+      internal::checked_pointer_cast<const 
FixedSizeListScalar>(scalar->value)->value;
+  if (array->null_count() > 0) {
+    return Status::Invalid("Cannot convert data with nulls values to Tensor.");
+  }
+  const auto value_type =
+      internal::checked_pointer_cast<FixedWidthType>(this->value_type());
+  const auto byte_width = value_type->byte_width();
+
+  std::vector<int64_t> permutation = this->permutation();
+  if (permutation.empty()) {
+    for (int64_t j = 0; j < static_cast<int64_t>(this->ndim()); ++j) {
+      permutation.emplace_back(j);
+    }

Review Comment:
   ```suggestion
       permutation.resize(ndim);
       std::iota(permutation.begin(), permutation.end(), 0);
   ```



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -321,6 +322,70 @@ TEST_F(TestExtensionType, TestFromTensorType) {
   }
 }
 
+void CheckToTensor(const std::vector<int64_t> values, const int32_t cell_size,
+                   const std::vector<int64_t> cell_shape,
+                   const std::vector<int64_t> cell_permutation,
+                   const std::vector<std::string> cell_dim_names,
+                   const std::vector<int64_t> tensor_shape,
+                   const std::vector<std::string> tensor_dim_names,
+                   const std::vector<int64_t> tensor_strides) {
+  auto buffer = Buffer::Wrap(values);
+  const std::shared_ptr<DataType> cell_type = fixed_size_list(int64(), 
cell_size);
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, buffer};
+  auto arr_data = std::make_shared<ArrayData>(int64(), values.size(), buffers);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, 
cell_type));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto expected_tensor,
+      Tensor::Make(int64(), buffer, tensor_shape, tensor_strides, 
tensor_dim_names));
+  const auto ext_type =
+      fixed_shape_tensor(int64(), cell_shape, cell_permutation, 
cell_dim_names);
+
+  auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr);
+  const auto tensor_array = 
std::static_pointer_cast<FixedShapeTensorArray>(ext_arr);
+  ASSERT_OK_AND_ASSIGN(const auto actual_tensor, tensor_array->ToTensor());
+
+  ASSERT_EQ(actual_tensor->type(), expected_tensor->type());
+  ASSERT_EQ(actual_tensor->shape(), expected_tensor->shape());
+  ASSERT_EQ(actual_tensor->strides(), expected_tensor->strides());
+  ASSERT_EQ(actual_tensor->dim_names(), expected_tensor->dim_names());
+  ASSERT_TRUE(actual_tensor->data()->Equals(*expected_tensor->data()));
+  ASSERT_TRUE(actual_tensor->Equals(*expected_tensor));
+}
+
+TEST_F(TestExtensionType, ToTensor) {
+  auto cell_sizes = std::vector<int32_t>{12, 12, 12, 12, 6, 6, 18, 18, 18, 18};
+
+  auto cell_shapes =
+      std::vector<std::vector<int64_t>>{{3, 4}, {4, 3}, {4, 3}, {3, 4},    {2, 
3},
+                                        {3, 2}, {3, 6}, {6, 3}, {3, 2, 3}, {3, 
2, 3}};
+  auto tensor_shapes = std::vector<std::vector<int64_t>>{
+      {3, 3, 4}, {3, 3, 4}, {3, 4, 3}, {3, 4, 3},    {6, 2, 3},
+      {6, 2, 3}, {2, 3, 6}, {2, 3, 6}, {2, 3, 2, 3}, {2, 3, 2, 3}};
+
+  auto cell_permutations =
+      std::vector<std::vector<int64_t>>{{0, 1}, {1, 0}, {0, 1}, {1, 0},    {0, 
1},
+                                        {1, 0}, {0, 1}, {1, 0}, {0, 1, 2}, {2, 
1, 0}};
+  auto tensor_strides = std::vector<std::vector<int64_t>>{
+      {96, 32, 8}, {96, 8, 24},  {96, 24, 8},  {96, 8, 32},      {48, 24, 8},
+      {48, 8, 16}, {144, 48, 8}, {144, 8, 24}, {144, 48, 24, 8}, {144, 8, 24, 
48}};

Review Comment:
   Is it useful to check so many variations? There's not a lot of structure 
here, and it's not obvious you're trying to exercise something specific.



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -293,40 +335,49 @@ const Result<std::shared_ptr<Tensor>> 
FixedShapeTensorArray::ToTensor() const {
   // To convert an array of n dimensional tensors to a n+1 dimensional tensor 
we
   // interpret the array's length as the first dimension the new tensor.
 
-  auto ext_arr = std::static_pointer_cast<FixedSizeListArray>(this->storage());
-  auto ext_type = 
internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
-  ARROW_RETURN_IF(!is_fixed_width(*ext_arr->value_type()),
-                  Status::Invalid(ext_arr->value_type()->ToString(),
-                                  " is not valid data type for a tensor"));
-  auto permutation = ext_type->permutation();
+  const auto ext_type =
+      internal::checked_pointer_cast<FixedShapeTensorType>(this->type());
+  const auto value_type = ext_type->value_type();
+  ARROW_RETURN_IF(
+      !is_fixed_width(*value_type),
+      Status::Invalid(value_type->ToString(), " is not valid data type for a 
tensor"));

Review Comment:
   ```suggestion
         Status::TypeError(value_type->ToString(), " is not valid data type for 
a tensor"));
   ```



##########
python/pyarrow/scalar.pxi:
##########
@@ -1027,6 +1027,32 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+cdef class FixedShapeTensorScalar(ExtensionScalar):
+    """
+    Concrete class for fixed shape tensor extension scalar.
+    """
+
+    def to_numpy_ndarray(self):

Review Comment:
   Why not `to_numpy`?



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -203,6 +205,46 @@ std::shared_ptr<Array> FixedShapeTensorType::MakeArray(
   return std::make_shared<ExtensionArray>(data);
 }
 
+const Result<std::shared_ptr<Tensor>> FixedShapeTensorType::GetTensor(
+    const std::shared_ptr<ExtensionScalar>& scalar) const {
+  if (!is_fixed_width(*this->value_type())) {
+    return Status::Invalid("Cannot convert non-fixed-width values to Tensor.");

Review Comment:
   ```suggestion
       return Status::TypeError("Cannot convert non-fixed-width values to 
Tensor.");
   ```



##########
cpp/src/arrow/extension/fixed_shape_tensor.cc:
##########
@@ -340,6 +391,12 @@ Result<std::shared_ptr<DataType>> 
FixedShapeTensorType::Make(
     return Status::Invalid("dim_names size must match shape size. Expected: ",
                            shape.size(), " Got: ", dim_names.size());
   }
+  for (auto i : permutation) {
+    if (i < 0 || i >= static_cast<int64_t>(shape.size())) {
+      return Status::Invalid("permutation indices must be in [0, 
shape.size()). Got: ",
+                             i);
+    }

Review Comment:
   Is that all? Am I allowed to pass `[1,2,2,1]` as permutation?
   



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -321,6 +322,70 @@ TEST_F(TestExtensionType, TestFromTensorType) {
   }
 }
 
+void CheckToTensor(const std::vector<int64_t> values, const int32_t cell_size,
+                   const std::vector<int64_t> cell_shape,
+                   const std::vector<int64_t> cell_permutation,
+                   const std::vector<std::string> cell_dim_names,
+                   const std::vector<int64_t> tensor_shape,
+                   const std::vector<std::string> tensor_dim_names,
+                   const std::vector<int64_t> tensor_strides) {
+  auto buffer = Buffer::Wrap(values);
+  const std::shared_ptr<DataType> cell_type = fixed_size_list(int64(), 
cell_size);
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, buffer};
+  auto arr_data = std::make_shared<ArrayData>(int64(), values.size(), buffers);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, 
cell_type));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto expected_tensor,
+      Tensor::Make(int64(), buffer, tensor_shape, tensor_strides, 
tensor_dim_names));
+  const auto ext_type =
+      fixed_shape_tensor(int64(), cell_shape, cell_permutation, 
cell_dim_names);
+
+  auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr);
+  const auto tensor_array = 
std::static_pointer_cast<FixedShapeTensorArray>(ext_arr);
+  ASSERT_OK_AND_ASSIGN(const auto actual_tensor, tensor_array->ToTensor());
+
+  ASSERT_EQ(actual_tensor->type(), expected_tensor->type());
+  ASSERT_EQ(actual_tensor->shape(), expected_tensor->shape());
+  ASSERT_EQ(actual_tensor->strides(), expected_tensor->strides());
+  ASSERT_EQ(actual_tensor->dim_names(), expected_tensor->dim_names());
+  ASSERT_TRUE(actual_tensor->data()->Equals(*expected_tensor->data()));
+  ASSERT_TRUE(actual_tensor->Equals(*expected_tensor));
+}
+
+TEST_F(TestExtensionType, ToTensor) {
+  auto cell_sizes = std::vector<int32_t>{12, 12, 12, 12, 6, 6, 18, 18, 18, 18};
+
+  auto cell_shapes =
+      std::vector<std::vector<int64_t>>{{3, 4}, {4, 3}, {4, 3}, {3, 4},    {2, 
3},
+                                        {3, 2}, {3, 6}, {6, 3}, {3, 2, 3}, {3, 
2, 3}};
+  auto tensor_shapes = std::vector<std::vector<int64_t>>{
+      {3, 3, 4}, {3, 3, 4}, {3, 4, 3}, {3, 4, 3},    {6, 2, 3},
+      {6, 2, 3}, {2, 3, 6}, {2, 3, 6}, {2, 3, 2, 3}, {2, 3, 2, 3}};
+
+  auto cell_permutations =
+      std::vector<std::vector<int64_t>>{{0, 1}, {1, 0}, {0, 1}, {1, 0},    {0, 
1},
+                                        {1, 0}, {0, 1}, {1, 0}, {0, 1, 2}, {2, 
1, 0}};
+  auto tensor_strides = std::vector<std::vector<int64_t>>{
+      {96, 32, 8}, {96, 8, 24},  {96, 24, 8},  {96, 8, 32},      {48, 24, 8},
+      {48, 8, 16}, {144, 48, 8}, {144, 8, 24}, {144, 48, 24, 8}, {144, 8, 24, 
48}};
+
+  auto cell_dim_names = std::vector<std::vector<std::string>>{
+      {"y", "z"}, {"z", "y"}, {"y", "z"}, {"z", "y"},      {"y", "z"},
+      {"z", "y"}, {"y", "z"}, {"z", "y"}, {"H", "W", "C"}, {"H", "W", "C"}};
+  auto tensor_dim_names = std::vector<std::vector<std::string>>{
+      {"", "y", "z"},      {"", "y", "z"},     {"", "y", "z"}, {"", "y", "z"},
+      {"", "y", "z"},      {"", "y", "z"},     {"", "y", "z"}, {"", "y", "z"},
+      {"", "H", "W", "C"}, {"", "C", "W", "H"}};
+
+  for (size_t i = 0; i < cell_shapes.size(); i++) {

Review Comment:
   Hmm... what is a "cell"? Can we use a terminology that exists in Arrow?



##########
python/pyarrow/array.pxi:
##########
@@ -3573,17 +3595,19 @@ class FixedShapeTensorArray(ExtensionArray):
           ]
         ]
         """
-        if not obj.flags["C_CONTIGUOUS"]:
-            raise ValueError('The data in the numpy array need to be in a 
single, '
-                             'C-style contiguous segment.')
+
+        permutation = (-np.array(obj.strides)).argsort()
+        if not permutation[0] == 0:

Review Comment:
   Nit
   ```suggestion
           if permutation[0] != 0:
   ```



##########
python/pyarrow/array.pxi:
##########
@@ -3519,17 +3519,42 @@ class FixedShapeTensorArray(ExtensionArray):
 
     def to_numpy_ndarray(self):
         """
-        Convert fixed shape tensor extension array to a numpy array (with 
dim+1).
+        Convert fixed shape tensor extension array to a numpy.ndarray with 
zero copy.
+        First dimension of ndarray will be the length of the fixed shape 
tensor array
+        and the rest of the dimensions will match the permuted shape of the 
fixed
+        shape tensor.
 
-        Note: ``permutation`` should be trivial (``None`` or ``[0, 1, ..., 
len(shape)-1]``).
+
+        Returns
+        -------
+        numpy.ndarray
+            Ndarray representing tensors in the fixed shape tensor array 
concatenated
+            along the first dimension.
         """
-        if self.type.permutation is None or self.type.permutation == 
list(range(len(self.type.shape))):
-            np_flat = np.asarray(self.storage.flatten())
-            numpy_tensor = np_flat.reshape((len(self),) + 
tuple(self.type.shape))
-            return numpy_tensor
-        else:
-            raise ValueError(
-                'Only non-permuted tensors can be converted to numpy tensors.')
+
+        return self.to_tensor().to_numpy()
+
+    def to_tensor(self):
+        """
+        Convert fixed shape tensor extension array to a pyarrow.Tensor with 
zero copy.
+        First dimension of pyarrow.Tensor will be the length of the fixed 
shape tensor
+        array and the rest of the dimensions will match the permuted shape of 
the fixed
+        shape tensor.
+

Review Comment:
   ```suggestion
           Convert fixed shape tensor extension array to a pyarrow.Tensor.
   
           The resulting Tensor will have (ndim + 1) dimensions.
           The size of the first dimension will be the length of the fixed 
shape tensor array
           and the rest of the dimensions will match the permuted shape of the 
fixed
           shape tensor.
           
           The conversion is zero-copy.
   ```



##########
python/pyarrow/array.pxi:
##########
@@ -3519,17 +3519,42 @@ class FixedShapeTensorArray(ExtensionArray):
 
     def to_numpy_ndarray(self):
         """
-        Convert fixed shape tensor extension array to a numpy array (with 
dim+1).
+        Convert fixed shape tensor extension array to a numpy.ndarray with 
zero copy.

Review Comment:
   ```suggestion
           Convert fixed shape tensor extension array to a multi-dimensional 
numpy.ndarray.
           
   ```



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -321,6 +322,70 @@ TEST_F(TestExtensionType, TestFromTensorType) {
   }
 }
 
+void CheckToTensor(const std::vector<int64_t> values, const int32_t cell_size,

Review Comment:
   Also can we use something else than `int64_t` as data type? So that not all 
variables here have the same type...



##########
cpp/src/arrow/extension/fixed_shape_tensor_test.cc:
##########
@@ -321,6 +322,70 @@ TEST_F(TestExtensionType, TestFromTensorType) {
   }
 }
 
+void CheckToTensor(const std::vector<int64_t> values, const int32_t cell_size,
+                   const std::vector<int64_t> cell_shape,
+                   const std::vector<int64_t> cell_permutation,
+                   const std::vector<std::string> cell_dim_names,
+                   const std::vector<int64_t> tensor_shape,
+                   const std::vector<std::string> tensor_dim_names,
+                   const std::vector<int64_t> tensor_strides) {
+  auto buffer = Buffer::Wrap(values);
+  const std::shared_ptr<DataType> cell_type = fixed_size_list(int64(), 
cell_size);
+  std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, buffer};
+  auto arr_data = std::make_shared<ArrayData>(int64(), values.size(), buffers);
+  auto arr = std::make_shared<Int64Array>(arr_data);
+  ASSERT_OK_AND_ASSIGN(auto fsla_arr, FixedSizeListArray::FromArrays(arr, 
cell_type));
+
+  ASSERT_OK_AND_ASSIGN(
+      auto expected_tensor,
+      Tensor::Make(int64(), buffer, tensor_shape, tensor_strides, 
tensor_dim_names));
+  const auto ext_type =
+      fixed_shape_tensor(int64(), cell_shape, cell_permutation, 
cell_dim_names);
+
+  auto ext_arr = ExtensionType::WrapArray(ext_type, fsla_arr);
+  const auto tensor_array = 
std::static_pointer_cast<FixedShapeTensorArray>(ext_arr);
+  ASSERT_OK_AND_ASSIGN(const auto actual_tensor, tensor_array->ToTensor());

Review Comment:
   Why is it called `ToTensor` here but `MakeTensor` for scalars?



##########
python/pyarrow/scalar.pxi:
##########
@@ -1027,6 +1027,32 @@ cdef class ExtensionScalar(Scalar):
         return pyarrow_wrap_scalar(<shared_ptr[CScalar]> sp_scalar)
 
 
+cdef class FixedShapeTensorScalar(ExtensionScalar):
+    """
+    Concrete class for fixed shape tensor extension scalar.
+    """
+
+    def to_numpy_ndarray(self):
+        """
+        Convert fixed shape tensor extension scalar to a numpy.ndarray with 
zero copy.

Review Comment:
   ```suggestion
           Convert fixed shape tensor scalar to a numpy.ndarray.
           
           The resulting ndarray's shape matches the permuted shape of the
           fixed shape tensor scalar.
           The conversion is zero-copy.
   
           Returns
           -------
           numpy.ndarray
   ```



##########
python/pyarrow/array.pxi:
##########
@@ -3573,17 +3595,19 @@ class FixedShapeTensorArray(ExtensionArray):
           ]
         ]
         """
-        if not obj.flags["C_CONTIGUOUS"]:
-            raise ValueError('The data in the numpy array need to be in a 
single, '
-                             'C-style contiguous segment.')
+
+        permutation = (-np.array(obj.strides)).argsort()

Review Comment:
   Neat, though I think we want a stable sort in case there are duplicates in 
`strides`:
   ```suggestion
           permutation = (-np.array(obj.strides)).argsort(kind='stable')
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to