This is an automated email from the ASF dual-hosted git repository. uwe pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new 76e8fe9 ARROW-4867: [Python] Respect ordering of columns argument passed to Table.from_pandas 76e8fe9 is described below commit 76e8fe98d9d61a58ed706c448697e8474fabd30f Author: Wes McKinney <wesm+...@apache.org> AuthorDate: Sat Mar 16 17:33:26 2019 +0100 ARROW-4867: [Python] Respect ordering of columns argument passed to Table.from_pandas I read through the discussion on ARROW-3766 where this change was originally made, and I think there was a misunderstanding about a comment I made where I said "The columns argument in Table.from_pandas is just for column filtering". I admit it's a big ambiguous what's the right thing to do, but it seems like the user intent of passing `columns` is to use that order in the resulting schema, but not error on columns that are not found. We could also introduce "null" type columns for "n [...] Author: Wes McKinney <wesm+...@apache.org> Closes #3930 from wesm/ARROW-4867 and squashes the following commits: 4b4ad64e <Wes McKinney> Respect ordering of columns argument passed to Table.from_pandas --- python/pyarrow/pandas_compat.py | 4 +--- python/pyarrow/tests/test_convert_pandas.py | 9 +++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 10038de..90a0ad6 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -411,9 +411,7 @@ def _resolve_columns_of_interest(df, schema, columns): elif schema is not None: columns = schema.names elif columns is not None: - # columns is only for filtering, the function must keep the column - # ordering of either the dataframe or the passed schema - columns = [c for c in df.columns if c in columns] + columns = [c for c in columns if c in df.columns] else: columns = df.columns diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 16e2a76..0aab444 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -140,7 +140,8 @@ class TestConvertMetadata(object): assert table.column(0).name == '0' def test_from_pandas_with_columns(self): - df = pd.DataFrame({0: [1, 2, 3], 1: [1, 3, 3], 2: [2, 4, 5]}) + df = pd.DataFrame({0: [1, 2, 3], 1: [1, 3, 3], 2: [2, 4, 5]}, + columns=[1, 0]) table = pa.Table.from_pandas(df, columns=[0, 1]) expected = pa.Table.from_pandas(df[[0, 1]]) @@ -2495,15 +2496,15 @@ def test_table_from_pandas_columns_argument_only_does_filtering(): columns1 = ['arrays', 'floats', 'partition'] schema1 = pa.schema([ - ('partition', pa.int64()), ('arrays', pa.list_(pa.int64())), ('floats', pa.float64()), + ('partition', pa.int64()) ]) columns2 = ['floats', 'partition'] schema2 = pa.schema([ - ('partition', pa.int64()), - ('floats', pa.float64()) + ('floats', pa.float64()), + ('partition', pa.int64()) ]) table1 = pa.Table.from_pandas(df, columns=columns1, preserve_index=False)