rok commented on code in PR #40354:
URL: https://github.com/apache/arrow/pull/40354#discussion_r3085979408


##########
python/pyarrow/array.pxi:
##########
@@ -4955,6 +4945,321 @@ cdef class Bool8Array(ExtensionArray):
         return Bool8Array.from_storage(storage_arr)
 
 
+def _check_sequence_param(value, ndim, name):
+    if value is None:
+        return False
+    if not isinstance(value, Sequence):
+        raise TypeError(f"{name} must be a tuple or list")
+    if len(value) != ndim:
+        raise ValueError(
+            (f"The length of {name} ({len(value)}) does not match"
+             f" the number of tensor dimensions ({ndim})."))
+    return True
+
+
+def _validate_dim_names(dim_names, ndim):
+    if not _check_sequence_param(dim_names, ndim, "dim_names"):
+        return
+    if not all(isinstance(name, str) for name in dim_names):
+        raise TypeError("Each element of dim_names must be a string")
+
+
+def _validate_permutation(permutation, ndim):
+    if not _check_sequence_param(permutation, ndim, "permutation"):
+        return None
+    normalized = [int(x) for x in permutation]
+    if sorted(normalized) != list(range(ndim)):
+        raise ValueError(
+            "permutation must contain each dimension index exactly once")
+    return normalized
+
+
+def _validate_uniform_shape(uniform_shape, ndim):
+    if not _check_sequence_param(uniform_shape, ndim, "uniform_shape"):
+        return
+    for value in uniform_shape:
+        if value is not None and value < 0:
+            raise ValueError(
+                "uniform_shape must contain non-negative values")
+
+
+def _infer_uniform_shape(shape_rows, ndim):
+    if len(shape_rows) == 0:
+        return None
+    inferred = []
+    for i in range(ndim):
+        axis_size = shape_rows[0][i]
+        if all(shape[i] == axis_size for shape in shape_rows):
+            inferred.append(axis_size)
+        else:
+            inferred.append(None)
+    if all(x is None for x in inferred):
+        return None
+    return inferred
+
+
+def _permutation_from_strides(arr):
+    """Infer the dimension permutation from array strides.
+
+    Note: for arrays with size-1 dimensions, the inferred permutation
+    may be unreliable since size-1 strides are unconstrained. Callers
+    should skip permutation validation for such arrays.
+    """
+    return [int(x) for x in
+            (-np.array(arr.strides, dtype=np.int64)).argsort(kind="stable")]
+
+
+cdef class VariableShapeTensorArray(ExtensionArray):
+    """
+    Concrete class for variable shape tensor extension arrays.
+
+    Examples
+    --------
+    Define the extension type for tensor array
+
+    >>> import pyarrow as pa
+    >>> tensor_type = pa.variable_shape_tensor(pa.float64(), 2)
+
+    Create an extension array
+
+    >>> shapes = pa.array([[2, 3], [1, 2]], pa.list_(pa.int32(), 2))
+    >>> values = pa.array([[1, 2, 3, 4, 5, 6], [7, 8]], pa.list_(pa.float64()))
+    >>> arr = pa.StructArray.from_arrays([values, shapes], names=["data", 
"shape"])
+    >>> pa.ExtensionArray.from_storage(tensor_type, arr)
+    <pyarrow.lib.VariableShapeTensorArray object at ...>
+    -- is_valid: all not null
+    -- child 0 type: list<item: double>
+      [
+        [
+          1,
+          2,
+          3,
+          4,
+          5,
+          6
+        ],
+        [
+          7,
+          8
+        ]
+      ]
+    -- child 1 type: fixed_size_list<item: int32>[2]
+      [
+        [
+          2,
+          3
+        ],
+        [
+          1,
+          2
+        ]
+      ]
+    """
+
+    @staticmethod
+    def from_numpy_ndarray(obj, dim_names=None, permutation=None, 
uniform_shape=None,
+                           value_type=None, ndim=None):
+        """
+        Convert a sequence of numpy.ndarrays to a variable shape tensor 
extension array.
+        The length of the input sequence becomes the length of the output 
array.
+
+        Parameters
+        ----------
+        obj : Sequence[numpy.ndarray]
+            Sequence of ndarrays with matching dtype, ndim, and memory 
permutation.
+        dim_names : tuple or list of strings, default None
+            Explicit names to tensor dimensions.
+        permutation : tuple or list of integers, default None
+            Physical permutation for all input arrays. If None, inferred from 
strides.
+        uniform_shape : tuple or list of integers or None, default None
+            Optional known uniform dimensions in physical order. If None, 
inferred.
+        value_type : pyarrow.DataType or numpy dtype, default None
+            Optional explicit tensor value type. Required with empty input.
+        ndim : int, default None
+            Optional explicit tensor rank. Required with empty input.
+
+        Returns
+        -------
+        VariableShapeTensorArray
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import numpy as np
+        >>> arrays = [np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32),
+        ...           np.array([[7, 8]], dtype=np.int32)]
+        >>> pa.VariableShapeTensorArray.from_numpy_ndarray(arrays)
+        <pyarrow.lib.VariableShapeTensorArray object at ...>
+        ...
+        """
+        cdef:
+            list arrays
+            list shape_rows
+            int array_ndim
+            int i
+            object base_dtype
+            DataType arrow_type
+            list normalized_permutation
+            list permutation_metadata
+            DataType shape_type
+            Array values
+            Array shapes
+            StructArray struct_arr
+            VariableShapeTensorType ext_type
+
+        if isinstance(obj, np.ndarray):
+            raise TypeError("obj must be a sequence of numpy arrays")

Review Comment:
   It sure seems so. Removed the first check.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to