This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 76e8fe9  ARROW-4867: [Python] Respect ordering of columns argument 
passed to Table.from_pandas
76e8fe9 is described below

commit 76e8fe98d9d61a58ed706c448697e8474fabd30f
Author: Wes McKinney <wesm+...@apache.org>
AuthorDate: Sat Mar 16 17:33:26 2019 +0100

    ARROW-4867: [Python] Respect ordering of columns argument passed to 
Table.from_pandas
    
    I read through the discussion on ARROW-3766 where this change was 
originally made, and I think there was a misunderstanding about a comment I 
made where I said "The columns argument in Table.from_pandas is just for column 
filtering". I admit it's a big ambiguous what's the right thing to do, but it 
seems like the user intent of passing `columns` is to use that order in the 
resulting schema, but not error on columns that are not found. We could also 
introduce "null" type columns for "n [...]
    
    Author: Wes McKinney <wesm+...@apache.org>
    
    Closes #3930 from wesm/ARROW-4867 and squashes the following commits:
    
    4b4ad64e <Wes McKinney> Respect ordering of columns argument passed to 
Table.from_pandas
---
 python/pyarrow/pandas_compat.py             | 4 +---
 python/pyarrow/tests/test_convert_pandas.py | 9 +++++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 10038de..90a0ad6 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -411,9 +411,7 @@ def _resolve_columns_of_interest(df, schema, columns):
     elif schema is not None:
         columns = schema.names
     elif columns is not None:
-        # columns is only for filtering, the function must keep the column
-        # ordering of either the dataframe or the passed schema
-        columns = [c for c in df.columns if c in columns]
+        columns = [c for c in columns if c in df.columns]
     else:
         columns = df.columns
 
diff --git a/python/pyarrow/tests/test_convert_pandas.py 
b/python/pyarrow/tests/test_convert_pandas.py
index 16e2a76..0aab444 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -140,7 +140,8 @@ class TestConvertMetadata(object):
         assert table.column(0).name == '0'
 
     def test_from_pandas_with_columns(self):
-        df = pd.DataFrame({0: [1, 2, 3], 1: [1, 3, 3], 2: [2, 4, 5]})
+        df = pd.DataFrame({0: [1, 2, 3], 1: [1, 3, 3], 2: [2, 4, 5]},
+                          columns=[1, 0])
 
         table = pa.Table.from_pandas(df, columns=[0, 1])
         expected = pa.Table.from_pandas(df[[0, 1]])
@@ -2495,15 +2496,15 @@ def 
test_table_from_pandas_columns_argument_only_does_filtering():
 
     columns1 = ['arrays', 'floats', 'partition']
     schema1 = pa.schema([
-        ('partition', pa.int64()),
         ('arrays', pa.list_(pa.int64())),
         ('floats', pa.float64()),
+        ('partition', pa.int64())
     ])
 
     columns2 = ['floats', 'partition']
     schema2 = pa.schema([
-        ('partition', pa.int64()),
-        ('floats', pa.float64())
+        ('floats', pa.float64()),
+        ('partition', pa.int64())
     ])
 
     table1 = pa.Table.from_pandas(df, columns=columns1, preserve_index=False)

Reply via email to