This is an automated email from the ASF dual-hosted git repository.

rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new ba612971b8 GH-48978: [Python] test failures on pandas 3.0 for 
fastparquet and for zoneinfo w/o pytz (#48979)
ba612971b8 is described below

commit ba612971b8d421c4daf9629eadb6d1e425efa294
Author: tadeja <[email protected]>
AuthorDate: Fri Feb 20 13:06:50 2026 +0100

    GH-48978: [Python] test failures on pandas 3.0 for fastparquet and for 
zoneinfo w/o pytz (#48979)
    
    ### Rationale for this change
    Closes #48978
    
    ### What changes are included in this PR?
    Update to `parquet/test_basic.py test_fastparquet_cross_compatibility` for 
fastparquet string and categorical dtype differences causing failure `Attribute 
"dtype" are different`
    Update to `test_pandas.py‎ test_timestamp_as_object_non_nanosecond` for 
failure `ValueError: fromutc: dt.tzinfo is not self`.
    
    ### Are these changes tested?
    Yes. Initially tested locally with pandas upgraded to 3.0 as CI was still 
running with pandas 2.3.3 cached.
    
    ### Are there any user-facing changes?
    No.
    * GitHub Issue: #48978
    
    Lead-authored-by: Tadeja Kadunc <[email protected]>
    Co-authored-by: tadeja <[email protected]>
    Co-authored-by: Alenka Frim <[email protected]>
    Co-authored-by: Rok Mihevc <[email protected]>
    Signed-off-by: Rok Mihevc <[email protected]>
---
 python/pyarrow/tests/parquet/test_basic.py | 11 +++++++----
 python/pyarrow/tests/test_pandas.py        |  3 ++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/python/pyarrow/tests/parquet/test_basic.py 
b/python/pyarrow/tests/parquet/test_basic.py
index 345aee3c4e..03fcf2defe 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -736,6 +736,7 @@ def test_parquet_file_too_small(tempdir):
 @pytest.mark.fastparquet
 @pytest.mark.filterwarnings("ignore:RangeIndex:FutureWarning")
 @pytest.mark.filterwarnings("ignore:tostring:DeprecationWarning:fastparquet")
[email protected]("ignore:unclosed file:ResourceWarning")
 def test_fastparquet_cross_compatibility(tempdir):
     fp = pytest.importorskip('fastparquet')
 
@@ -759,17 +760,19 @@ def test_fastparquet_cross_compatibility(tempdir):
 
     fp_file = fp.ParquetFile(file_arrow)
     df_fp = fp_file.to_pandas()
-    tm.assert_frame_equal(df, df_fp)
+    # pandas 3 defaults to StringDtype for strings, fastparquet still returns 
object
+    # TODO: remove astype casts once fastparquet supports pandas 3 StringDtype
+    tm.assert_frame_equal(df_fp, df.astype({"a": object}))
 
     # Fastparquet -> arrow
     file_fastparquet = str(tempdir / "cross_compat_fastparquet.parquet")
-    fp.write(file_fastparquet, df)
+    # fastparquet doesn't support writing pandas 3 StringDtype yet
+    fp.write(file_fastparquet, df.astype({"a": object}))
 
     table_fp = pq.read_pandas(file_fastparquet)
     # for fastparquet written file, categoricals comes back as strings
     # (no arrow schema in parquet metadata)
-    df['f'] = df['f'].astype(object)
-    tm.assert_frame_equal(table_fp.to_pandas(), df)
+    tm.assert_frame_equal(table_fp.to_pandas(), df.astype({"f": object}))
 
 
 @pytest.mark.parametrize('array_factory', [
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index cecf10f216..5fde980dd8 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -24,6 +24,7 @@ import warnings
 
 from collections import OrderedDict
 from datetime import date, datetime, time, timedelta, timezone
+from zoneinfo import ZoneInfo
 
 import hypothesis as h
 import hypothesis.strategies as st
@@ -4956,7 +4957,7 @@ def test_timestamp_as_object_non_nanosecond(resolution, 
tz, dt):
         assert isinstance(result[0], datetime)
         if tz:
             assert result[0].tzinfo is not None
-            expected = result[0].tzinfo.fromutc(dt)
+            expected = dt.replace(tzinfo=timezone.utc).astimezone(ZoneInfo(tz))
         else:
             assert result[0].tzinfo is None
             expected = dt

Reply via email to