This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2b9e1e4  ARROW-45: [Python] Add unnest/flatten function for List types
2b9e1e4 is described below

commit 2b9e1e45c45ee411032212affaafb6e32a1bffd8
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Wed Oct 17 13:56:11 2018 +0200

    ARROW-45: [Python] Add unnest/flatten function for List types
    
    Author: Krisztián Szűcs <[email protected]>
    
    Closes #2757 from kszucs/ARROW-45 and squashes the following commits:
    
    0420020a <Krisztián Szűcs> remove Flatten from cpp API
    3aabaf72 <Krisztián Szűcs> lint
    c2f71f1b <Krisztián Szűcs> small docstring
    bbd42472 <Krisztián Szűcs> ListArray::Flatten
---
 python/pyarrow/array.pxi           | 11 ++++++++
 python/pyarrow/tests/test_array.py | 51 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 320852a..2d0f56d 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -937,6 +937,17 @@ cdef class ListArray(Array):
                                                cpool, &out))
         return pyarrow_wrap_array(out)
 
+    def flatten(self):
+        """
+        Unnest this ListArray by one level
+
+        Returns
+        -------
+        result : Array
+        """
+        cdef CListArray* arr = <CListArray*> self.ap
+        return pyarrow_wrap_array(arr.values())
+
 
 cdef class UnionArray(Array):
 
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index 65deddc..c340228 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -1090,6 +1090,57 @@ def test_invalid_tensor_construction():
         pa.Tensor()
 
 
+def test_list_array_flatten():
+    typ2 = pa.list_(
+        pa.list_(
+            pa.int64()
+        )
+    )
+    arr2 = pa.array([
+        None,
+        [
+            [1, None, 2],
+            None,
+            [3, 4]
+        ],
+        [],
+        [
+            [],
+            [5, 6],
+            None
+        ],
+        [
+            [7, 8]
+        ]
+    ])
+    assert arr2.type.equals(typ2)
+
+    typ1 = pa.list_(pa.int64())
+    arr1 = pa.array([
+        [1, None, 2],
+        None,
+        [3, 4],
+        [],
+        [5, 6],
+        None,
+        [7, 8]
+    ])
+    assert arr1.type.equals(typ1)
+
+    typ0 = pa.int64()
+    arr0 = pa.array([
+        1, None, 2,
+        3, 4,
+        5, 6,
+        7, 8
+    ])
+    assert arr0.type.equals(typ0)
+
+    assert arr2.flatten().equals(arr1)
+    assert arr1.flatten().equals(arr0)
+    assert arr2.flatten().flatten().equals(arr0)
+
+
 def test_struct_array_flatten():
     ty = pa.struct([pa.field('x', pa.int16()),
                     pa.field('y', pa.float32())])

Reply via email to