This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5c772af5c6 GH-35845: [CI][Python] Fix usage of assert_frame_equal in
test_hdfs.py (#35842)
5c772af5c6 is described below
commit 5c772af5c6684dfcd70fc9bf7f7e96a98b93c580
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Wed May 31 16:02:31 2023 +0200
GH-35845: [CI][Python] Fix usage of assert_frame_equal in test_hdfs.py
(#35842)
_add empty comment_
* Closes: #35845
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/pandas-shim.pxi | 4 ----
python/pyarrow/tests/test_hdfs.py | 27 +++++++++++++++------------
2 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/python/pyarrow/pandas-shim.pxi b/python/pyarrow/pandas-shim.pxi
index f7e0ceccbd..7dc5d590a7 100644
--- a/python/pyarrow/pandas-shim.pxi
+++ b/python/pyarrow/pandas-shim.pxi
@@ -225,10 +225,6 @@ cdef class _PandasAPIShim(object):
return obj.array
return obj.values
- def assert_frame_equal(self, *args, **kwargs):
- self._check_import()
- return self._pd.util.testing.assert_frame_equal
-
def get_rangeindex_attribute(self, level, name):
# public start/stop/step attributes added in pandas 0.25.0
self._check_import()
diff --git a/python/pyarrow/tests/test_hdfs.py
b/python/pyarrow/tests/test_hdfs.py
index c71353b45f..1f5f10f7d9 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -26,7 +26,6 @@ import numpy as np
import pytest
import pyarrow as pa
-from pyarrow.pandas_compat import _pandas_api
from pyarrow.tests import util
from pyarrow.tests.parquet.common import _test_dataframe
from pyarrow.tests.parquet.test_dataset import (
@@ -35,6 +34,12 @@ from pyarrow.tests.parquet.test_dataset import (
)
from pyarrow.util import guid
+try:
+ from pandas.testing import assert_frame_equal
+except ImportError:
+ pass
+
+
# ----------------------------------------------------------------------
# HDFS tests
@@ -317,10 +322,10 @@ class HdfsTestCases:
expected = self._write_multiple_hdfs_pq_files(tmpdir)
result = self.hdfs.read_parquet(tmpdir)
- _pandas_api.assert_frame_equal(result.to_pandas()
- .sort_values(by='index')
- .reset_index(drop=True),
- expected.to_pandas())
+ assert_frame_equal(
+ result.to_pandas().sort_values(by='index').reset_index(drop=True),
+ expected.to_pandas()
+ )
@pytest.mark.pandas
@pytest.mark.parquet
@@ -335,10 +340,10 @@ class HdfsTestCases:
path = _get_hdfs_uri(tmpdir)
result = pq.read_table(path)
- _pandas_api.assert_frame_equal(result.to_pandas()
- .sort_values(by='index')
- .reset_index(drop=True),
- expected.to_pandas())
+ assert_frame_equal(
+ result.to_pandas().sort_values(by='index').reset_index(drop=True),
+ expected.to_pandas()
+ )
@pytest.mark.pandas
@pytest.mark.parquet
@@ -361,7 +366,7 @@ class HdfsTestCases:
path, filesystem=self.hdfs, use_legacy_dataset=True
).to_pandas()
- _pandas_api.assert_frame_equal(result, df)
+ assert_frame_equal(result, df)
@pytest.mark.parquet
@pytest.mark.pandas
@@ -420,8 +425,6 @@ def _get_hdfs_uri(path):
@pytest.mark.parquet
@pytest.mark.fastparquet
def test_fastparquet_read_with_hdfs():
- from pandas.testing import assert_frame_equal
-
check_libhdfs_present()
try:
import snappy # noqa