This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 1eec38d833 GH-37574: [Python] Compatibilty with numpy 2.0 (#38040)
1eec38d833 is described below
commit 1eec38d833ca57ad826cea57f85d68532172c88c
Author: Thomas Grainger <[email protected]>
AuthorDate: Thu Oct 5 09:24:40 2023 -0700
GH-37574: [Python] Compatibilty with numpy 2.0 (#38040)
### What changes are included in this PR?
support for numpy 2
install numpy 2 and pandas 2.2.0.dev0 from scientific-python-nightly-wheels
### Are these changes tested?
I tested this locally with numpy==2.0.0.dev0 and
pandas==2.2.0.dev0+325.g6c58a217f5
### Are there any user-facing changes?
No
* Closes: #37574
Authored-by: Thomas Grainger <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
ci/scripts/install_pandas.sh | 4 ++--
python/pyarrow/pandas_compat.py | 7 ++++---
python/pyarrow/tests/test_pandas.py | 22 +++++++++++++++-------
python/pyarrow/tests/test_scalars.py | 4 +++-
4 files changed, 24 insertions(+), 13 deletions(-)
diff --git a/ci/scripts/install_pandas.sh b/ci/scripts/install_pandas.sh
index f0cb76fb66..6a506a8651 100755
--- a/ci/scripts/install_pandas.sh
+++ b/ci/scripts/install_pandas.sh
@@ -28,7 +28,7 @@ pandas=$1
numpy=${2:-"latest"}
if [ "${numpy}" = "nightly" ]; then
- pip install --extra-index-url
https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre numpy
+ pip install --extra-index-url
https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre numpy
elif [ "${numpy}" = "latest" ]; then
pip install numpy
else
@@ -38,7 +38,7 @@ fi
if [ "${pandas}" = "upstream_devel" ]; then
pip install git+https://github.com/pandas-dev/pandas.git
elif [ "${pandas}" = "nightly" ]; then
- pip install --extra-index-url
https://pypi.anaconda.org/scipy-wheels-nightly/simple --pre pandas
+ pip install --extra-index-url
https://pypi.anaconda.org/scientific-python-nightly-wheels/simple --pre pandas
elif [ "${pandas}" = "latest" ]; then
pip install pandas
else
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 4e5c868efd..e232603ba4 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -31,6 +31,7 @@ import re
import warnings
import numpy as np
+from numpy.core.numerictypes import sctypes as _np_sctypes
import pyarrow as pa
from pyarrow.lib import _pandas_api, frombytes # noqa
@@ -98,7 +99,7 @@ _numpy_logical_type_map = {
np.float32: 'float32',
np.float64: 'float64',
'datetime64[D]': 'date',
- np.unicode_: 'string',
+ np.str_: 'string',
np.bytes_: 'bytes',
}
@@ -780,7 +781,7 @@ def table_to_blockmanager(options, table, categories=None,
# dataframe (complex not included since not supported by Arrow)
_pandas_supported_numpy_types = {
str(np.dtype(typ))
- for typ in (np.sctypes['int'] + np.sctypes['uint'] + np.sctypes['float'] +
+ for typ in (_np_sctypes['int'] + _np_sctypes['uint'] +
_np_sctypes['float'] +
['object', 'bool'])
}
@@ -1010,7 +1011,7 @@ _pandas_logical_type_map = {
'date': 'datetime64[D]',
'datetime': 'datetime64[ns]',
'datetimetz': 'datetime64[ns]',
- 'unicode': np.unicode_,
+ 'unicode': np.str_,
'bytes': np.bytes_,
'string': np.str_,
'integer': np.int64,
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 67502af443..0d01928f44 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -50,6 +50,14 @@ except ImportError:
pass
+try:
+ _np_VisibleDeprecationWarning = np.VisibleDeprecationWarning
+except AttributeError:
+ from numpy.exceptions import (
+ VisibleDeprecationWarning as _np_VisibleDeprecationWarning
+ )
+
+
# Marks all of the tests in this module
pytestmark = pytest.mark.pandas
@@ -706,7 +714,7 @@ class TestConvertPrimitiveTypes:
def test_float_nulls_to_ints(self):
# ARROW-2135
- df = pd.DataFrame({"a": [1.0, 2.0, np.NaN]})
+ df = pd.DataFrame({"a": [1.0, 2.0, np.nan]})
schema = pa.schema([pa.field("a", pa.int16(), nullable=True)])
table = pa.Table.from_pandas(df, schema=schema, safe=False)
assert table[0].to_pylist() == [1, 2, None]
@@ -2329,7 +2337,7 @@ class TestConvertListTypes:
with warnings.catch_warnings():
warnings.filterwarnings("ignore",
"Creating an ndarray from ragged nested",
- np.VisibleDeprecationWarning)
+ _np_VisibleDeprecationWarning)
warnings.filterwarnings("ignore", "elementwise comparison failed",
DeprecationWarning)
tm.assert_series_equal(
@@ -2441,26 +2449,26 @@ class TestConvertListTypes:
np_arr = chunked_arr.to_numpy()
expected = np.array([[1., 2.], [3., 4., 5.], None,
- [6., np.NaN]], dtype="object")
+ [6., np.nan]], dtype="object")
for left, right in zip(np_arr, expected):
if right is None:
assert left == right
else:
npt.assert_array_equal(left, right)
- expected_base = np.array([[1., 2., 3., 4., 5., 6., np.NaN]])
+ expected_base = np.array([[1., 2., 3., 4., 5., 6., np.nan]])
npt.assert_array_equal(np_arr[0].base, expected_base)
np_arr_sliced = chunked_arr.slice(1, 3).to_numpy()
- expected = np.array([[3, 4, 5], None, [6, np.NaN]], dtype="object")
+ expected = np.array([[3, 4, 5], None, [6, np.nan]], dtype="object")
for left, right in zip(np_arr_sliced, expected):
if right is None:
assert left == right
else:
npt.assert_array_equal(left, right)
- expected_base = np.array([[3., 4., 5., 6., np.NaN]])
+ expected_base = np.array([[3., 4., 5., 6., np.nan]])
npt.assert_array_equal(np_arr_sliced[0].base, expected_base)
def test_list_values_behind_null(self):
@@ -2471,7 +2479,7 @@ class TestConvertListTypes:
)
np_arr = arr.to_numpy(zero_copy_only=False)
- expected = np.array([[1., 2.], None, [3., np.NaN]], dtype="object")
+ expected = np.array([[1., 2.], None, [3., np.nan]], dtype="object")
for left, right in zip(np_arr, expected):
if right is None:
assert left == right
diff --git a/python/pyarrow/tests/test_scalars.py
b/python/pyarrow/tests/test_scalars.py
index 8a1dcfb057..1d8d77f50d 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -204,7 +204,9 @@ def test_numerics():
# float16
s = pa.scalar(np.float16(0.5), type='float16')
assert isinstance(s, pa.HalfFloatScalar)
- assert repr(s) == "<pyarrow.HalfFloatScalar: 0.5>"
+ # on numpy2 repr(np.float16(0.5)) == "np.float16(0.5)"
+ # on numpy1 repr(np.float16(0.5)) == "0.5"
+ assert repr(s) == f"<pyarrow.HalfFloatScalar: {np.float16(0.5)!r}>"
assert str(s) == "0.5"
assert s.as_py() == 0.5