This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new b679a96d42 GH-34283 [Python] Add types_mapper support to index for
to_pandas (#34445)
b679a96d42 is described below
commit b679a96d426f4df1a2d15d452f312c968cdfc8f6
Author: Patrick Hoefler <[email protected]>
AuthorDate: Thu Mar 9 07:21:05 2023 +0000
GH-34283 [Python] Add types_mapper support to index for to_pandas (#34445)
### Rationale for this change
### What changes are included in this PR?
Only respects types_mapper for indexes as well
### Are these changes tested?
Yes
### Are there any user-facing changes?
Technically this breaks the API in a way that we would now respect the
types_mapper for the index.
- [x] closes #34283
cc @ jorisvandenbossche
Authored-by: Patrick Hoefler <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/pandas_compat.py | 10 +++++-----
python/pyarrow/tests/test_pandas.py | 18 ++++++++++++++++++
2 files changed, 23 insertions(+), 5 deletions(-)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index d624459ca4..a6de60e87b 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -808,7 +808,7 @@ def table_to_blockmanager(options, table, categories=None,
index_descriptors = pandas_metadata['index_columns']
table = _add_any_metadata(table, pandas_metadata)
table, index = _reconstruct_index(table, index_descriptors,
- all_columns)
+ all_columns, types_mapper)
ext_columns_dtypes = _get_extension_dtypes(
table, all_columns, types_mapper)
else:
@@ -940,7 +940,7 @@ def _deserialize_column_index(block_table, all_columns,
column_indexes):
return columns
-def _reconstruct_index(table, index_descriptors, all_columns):
+def _reconstruct_index(table, index_descriptors, all_columns,
types_mapper=None):
# 0. 'field_name' is the name of the column in the arrow Table
# 1. 'name' is the user-facing name of the column, that is, it came from
# pandas
@@ -959,7 +959,7 @@ def _reconstruct_index(table, index_descriptors,
all_columns):
for descr in index_descriptors:
if isinstance(descr, str):
result_table, index_level, index_name = _extract_index_level(
- table, result_table, descr, field_name_to_metadata)
+ table, result_table, descr, field_name_to_metadata,
types_mapper)
if index_level is None:
# ARROW-1883: the serialized index column was not found
continue
@@ -995,7 +995,7 @@ def _reconstruct_index(table, index_descriptors,
all_columns):
def _extract_index_level(table, result_table, field_name,
- field_name_to_metadata):
+ field_name_to_metadata, types_mapper=None):
logical_name = field_name_to_metadata[field_name]['name']
index_name = _backwards_compatible_index_name(field_name, logical_name)
i = table.schema.get_field_index(field_name)
@@ -1007,7 +1007,7 @@ def _extract_index_level(table, result_table, field_name,
pd = _pandas_api.pd
col = table.column(i)
- values = col.to_pandas().values
+ values = col.to_pandas(types_mapper=types_mapper).values
if hasattr(values, 'flags') and not values.flags.writeable:
# ARROW-1054: in pandas 0.19.2, factorize will reject
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 4d0ddf8754..2c3c986565 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4159,6 +4159,24 @@ def
test_roundtrip_empty_table_with_extension_dtype_index():
dtype='object')
[email protected]("index", ["a", ["a", "b"]])
+def test_to_pandas_types_mapper_index(index):
+ if Version(pd.__version__) < Version("1.5.0"):
+ pytest.skip("ArrowDtype missing")
+ df = pd.DataFrame(
+ {
+ "a": [1, 2],
+ "b": [3, 4],
+ "c": [5, 6],
+ },
+ dtype=pd.ArrowDtype(pa.int64()),
+ ).set_index(index)
+ expected = df.copy()
+ table = pa.table(df)
+ result = table.to_pandas(types_mapper=pd.ArrowDtype)
+ tm.assert_frame_equal(result, expected)
+
+
def test_array_to_pandas_types_mapper():
# https://issues.apache.org/jira/browse/ARROW-9664
if Version(pd.__version__) < Version("1.2.0"):