Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-dask for openSUSE:Factory checked in at 2023-01-06 17:05:35 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-dask (Old) and /work/SRC/openSUSE:Factory/.python-dask.new.1563 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-dask" Fri Jan 6 17:05:35 2023 rev:59 rq:1056245 version:2022.12.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-dask/python-dask.changes 2022-11-23 09:48:20.543115327 +0100 +++ /work/SRC/openSUSE:Factory/.python-dask.new.1563/python-dask.changes 2023-01-06 17:06:25.276473031 +0100 @@ -1,0 +2,67 @@ +Mon Jan 2 20:44:44 UTC 2023 - Ben Greiner <c...@bnavigator.de> + +- Update to 2022.12.1 + ## Enhancements + * Support dtype_backend="pandas|pyarrow" configuration (GH#9719) + James Bourbeau + * Support cupy.ndarray to cudf.DataFrame dispatching in + dask.dataframe (GH#9579) Richard (Rick) Zamora + * Make filesystem-backend configurable in read_parquet (GH#9699) + Richard (Rick) Zamora + * Serialize all pyarrow extension arrays efficiently (GH#9740) + James Bourbeau + ## Bug Fixes + * Fix bug when repartitioning with tz-aware datetime index + (GH#9741) James Bourbeau + * Partial functions in aggs may have arguments (GH#9724) Irina + Truong + * Add support for simple operation with pyarrow-backed extension + dtypes (GH#9717) James Bourbeau + * Rename columns correctly in case of SeriesGroupby (GH#9716) + Lawrence Mitchell + ## Maintenance + * Add zarr to Python 3.11 CI environment (GH#9771) James Bourbeau + * Add support for Python 3.11 (GH#9708) Thomas Grainger + * Bump actions/checkout from 3.1.0 to 3.2.0 (GH#9753) + * Avoid np.bool8 deprecation warning (GH#9737) James Bourbeau + * Make sure dev packages arenât overwritten in upstream CI build + (GH#9731) James Bourbeau + * Avoid adding data.h5 and mydask.html files during tests + (GH#9726) Thomas Grainger +- Release 2022.12.0 + ## Enhancements + * Remove statistics-based set_index logic from read_parquet + (GH#9661) Richard (Rick) Zamora + * Add support for use_nullable_dtypes to dd.read_parquet + (GH#9617) Ian Rose + * Fix map_overlap in order to accept pandas arguments (GH#9571) + Fabien Aulaire + * Fix pandas 1.5+ FutureWarning in .str.split(..., expand=True) + (GH#9704) Jacob Hayes + * Enable column projection for groupby slicing (GH#9667) Richard + (Rick) Zamora + * Support duplicate column cum-functions (GH#9685) Ben + * Improve error message for failed backend dispatch call + (GH#9677) Richard (Rick) Zamora + ## Bug Fixes + * Revise meta creation in arrow parquet engine (GH#9672) Richard + (Rick) Zamora + * Fix da.fft.fft for array-like inputs (GH#9688) James Bourbeau + * Fix groupby -aggregation when grouping on an index by name + (GH#9646) Richard (Rick) Zamora + ## Maintenance + * Avoid PytestReturnNotNoneWarning in test_inheriting_class + (GH#9707) Thomas Grainger + * Fix flaky test_dataframe_aggregations_multilevel (GH#9701) + Richard (Rick) Zamora + * Bump mypy version (GH#9697) crusaderky + * Disable dashboard in test_map_partitions_df_input (GH#9687) + James Bourbeau + * Use latest xarray-contrib/issue-from-pytest-log in upstream + build (GH#9682) James Bourbeau + * xfail ttest_1samp for upstream scipy (GH#9670) James Bourbeau + * Update gpuCI RAPIDS_VER to 23.02 (GH#9678) +- Add dask-pr9777-np1.24.patch gh#dask/dask#9777 +- Move to PEP517 build + +------------------------------------------------------------------- Old: ---- dask-2022.11.1.tar.gz New: ---- dask-2022.12.1.tar.gz dask-pr9777-np1.24.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-dask.spec ++++++ --- /var/tmp/diff_new_pack.2ThyqY/_old 2023-01-06 17:06:26.044477344 +0100 +++ /var/tmp/diff_new_pack.2ThyqY/_new 2023-01-06 17:06:26.048477367 +0100 @@ -1,7 +1,7 @@ # # spec file # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -42,15 +42,20 @@ Name: python-dask%{psuffix} # ===> Note: python-dask MUST be updated in sync with python-distributed! <=== -Version: 2022.11.1 +Version: 2022.12.1 Release: 0 Summary: Minimal task scheduling abstraction License: BSD-3-Clause URL: https://dask.org +# SourceRepository: https://github.com/dask/dask Source0: https://files.pythonhosted.org/packages/source/d/dask/dask-%{version}.tar.gz +# PATCH-FIX-UPSTREAM dask-pr9777-np1.24.patch gh#dask/dask#9777 +Patch0: dask-pr9777-np1.24.patch BuildRequires: %{python_module base >= 3.8} BuildRequires: %{python_module packaging >= 20.0} +BuildRequires: %{python_module pip} BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module wheel} BuildRequires: fdupes BuildRequires: python-rpm-macros Requires: python-PyYAML >= 5.3.1 @@ -70,7 +75,6 @@ Recommends: %{name}-dot = %{version} Recommends: python-SQLAlchemy >= 1.4.0 Recommends: python-cityhash -Recommends: python-distributed >= %{version} Recommends: python-fastparquet Recommends: python-gcsfs >= 0.4.0 Recommends: python-murmurhash @@ -327,11 +331,11 @@ chmod a-x dask/dataframe/io/orc/utils.py %build -%python_build +%pyproject_wheel %install %if !%{with test} -%python_install +%pyproject_install %python_clone -a %{buildroot}%{_bindir}/dask %{python_expand # give SUSE specific install instructions sed -E -i '/Please either conda or pip install/,/python -m pip install/ { @@ -373,6 +377,9 @@ donttest+=" or test_select_from_select" # tries to get an IP address donttest+=" or test_map_partitions_df_input" +# more nullcast pandas warnings since numpy 1.24 (see also gh#dask/dask#9793) +donttest+=" or (test_array_core and test_setitem_extended_API_2d_mask)" +donttest+=" or (test_arithmetics_reduction and test_datetime_std)" %pytest --pyargs dask -n auto -r fE -m "not network" -k "not ($donttest)" --reruns 3 --reruns-delay 3 %endif @@ -388,7 +395,7 @@ %license LICENSE.txt %python_alternative %{_bindir}/dask %{python_sitelib}/dask/ -%{python_sitelib}/dask-%{version}*-info +%{python_sitelib}/dask-%{version}.dist-info %exclude %{python_sitelib}/dask/array/ %exclude %{python_sitelib}/dask/bag/ %exclude %{python_sitelib}/dask/dataframe/ ++++++ dask-2022.11.1.tar.gz -> dask-2022.12.1.tar.gz ++++++ /work/SRC/openSUSE:Factory/python-dask/dask-2022.11.1.tar.gz /work/SRC/openSUSE:Factory/.python-dask.new.1563/dask-2022.12.1.tar.gz differ: char 5, line 1 ++++++ dask-pr9777-np1.24.patch ++++++ >From c68d1328a7478d4f1326daff0fccd5d0de24e01c Mon Sep 17 00:00:00 2001 From: James Bourbeau <jrbourb...@gmail.com> Date: Tue, 20 Dec 2022 11:32:17 -0600 Subject: [PATCH 1/3] Fix numpy warning in test_pandas_timestamp_overflow_pyarrow --- dask/array/numpy_compat.py | 1 + dask/dataframe/io/tests/test_parquet.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/dask/array/numpy_compat.py b/dask/array/numpy_compat.py index 8cbb5ad3dcf..59bd68f13a9 100644 --- a/dask/array/numpy_compat.py +++ b/dask/array/numpy_compat.py @@ -10,6 +10,7 @@ _numpy_121 = _np_version >= parse_version("1.21.0") _numpy_122 = _np_version >= parse_version("1.22.0") _numpy_123 = _np_version >= parse_version("1.23.0") +_numpy_124 = _np_version >= parse_version("1.24.0") # Taken from scikit-learn: diff --git a/dask/dataframe/io/tests/test_parquet.py b/dask/dataframe/io/tests/test_parquet.py index eed3b83e091..7a6fc3be284 100644 --- a/dask/dataframe/io/tests/test_parquet.py +++ b/dask/dataframe/io/tests/test_parquet.py @@ -1,3 +1,4 @@ +import contextlib import glob import math import os @@ -14,6 +15,7 @@ import dask import dask.dataframe as dd import dask.multiprocessing +from dask.array.numpy_compat import _numpy_124 from dask.blockwise import Blockwise, optimize_blockwise from dask.dataframe._compat import ( PANDAS_GT_110, @@ -3197,9 +3199,16 @@ def test_pandas_metadata_nullable_pyarrow(tmpdir): @PYARROW_MARK def test_pandas_timestamp_overflow_pyarrow(tmpdir): info = np.iinfo(np.dtype("int64")) - arr_numeric = np.linspace( - start=info.min + 2, stop=info.max, num=1024, dtype="int64" - ) + # In `numpy=1.24.0` NumPy warns when an overflow is encountered when casting from float to int + # https://numpy.org/doc/stable/release/1.24.0-notes.html#numpy-now-gives-floating-point-errors-in-casts + if _numpy_124: + ctx = pytest.warns(RuntimeWarning, match="invalid value encountered in cast") + else: + ctx = contextlib.nullcontext() + with ctx: + arr_numeric = np.linspace( + start=info.min + 2, stop=info.max, num=1024, dtype="int64" + ) arr_dates = arr_numeric.astype("datetime64[ms]") table = pa.Table.from_arrays([pa.array(arr_dates)], names=["ts"]) >From 7457f47f8202c0478909edc9f9068ae8d00f0cbb Mon Sep 17 00:00:00 2001 From: James Bourbeau <jrbourb...@gmail.com> Date: Tue, 20 Dec 2022 12:29:51 -0600 Subject: [PATCH 2/3] Temporarily avoid crick in Python 3.11 build --- continuous_integration/environment-3.11.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) [removed] >From 03512362608504e30120fa5214d50bb7f969d543 Mon Sep 17 00:00:00 2001 From: James Bourbeau <jrbourb...@gmail.com> Date: Tue, 20 Dec 2022 13:51:40 -0600 Subject: [PATCH 3/3] More --- dask/dataframe/core.py | 8 +++++++- dask/dataframe/tests/test_reshape.py | 24 +++++++++++++++++++++--- dask/diagnostics/tests/test_profiler.py | 12 ++++++++---- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/dask/dataframe/core.py b/dask/dataframe/core.py index 19d09597184..20f6b8d0795 100644 --- a/dask/dataframe/core.py +++ b/dask/dataframe/core.py @@ -8140,7 +8140,13 @@ def _convert_to_numeric(series, skipna): def _sqrt_and_convert_to_timedelta(partition, axis, *args, **kwargs): if axis == 1: - return pd.to_timedelta(M.std(partition, axis=axis, *args, **kwargs)) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=RuntimeWarning, + message="invalid value encountered in cast", + ) + return pd.to_timedelta(M.std(partition, axis=axis, *args, **kwargs)) is_df_like, time_cols = kwargs["is_df_like"], kwargs["time_cols"] diff --git a/dask/dataframe/tests/test_reshape.py b/dask/dataframe/tests/test_reshape.py index 88f9a59587c..a7d800f1c42 100644 --- a/dask/dataframe/tests/test_reshape.py +++ b/dask/dataframe/tests/test_reshape.py @@ -1,3 +1,4 @@ +import contextlib import warnings import numpy as np @@ -109,6 +110,19 @@ def decorator(): return decorator +@contextlib.contextmanager +def ignore_numpy_bool8_deprecation(): + # This warning comes from inside `pandas`. We can't do anything about it, so we ignore the warning. + # Note it's been fixed upstream in `pandas` https://github.com/pandas-dev/pandas/pull/49886. + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + category=DeprecationWarning, + message="`np.bool8` is a deprecated alias for `np.bool_`", + ) + yield + + @check_pandas_issue_45618_warning def test_get_dummies_sparse(): s = pd.Series(pd.Categorical(["a", "b", "a"], categories=["a", "b", "c"])) @@ -116,14 +130,16 @@ def test_get_dummies_sparse(): exp = pd.get_dummies(s, sparse=True) res = dd.get_dummies(ds, sparse=True) - assert_eq(exp, res) + with ignore_numpy_bool8_deprecation(): + assert_eq(exp, res) assert res.compute().a.dtype == "Sparse[uint8, 0]" assert pd.api.types.is_sparse(res.a.compute()) exp = pd.get_dummies(s.to_frame(name="a"), sparse=True) res = dd.get_dummies(ds.to_frame(name="a"), sparse=True) - assert_eq(exp, res) + with ignore_numpy_bool8_deprecation(): + assert_eq(exp, res) assert pd.api.types.is_sparse(res.a_a.compute()) @@ -139,7 +155,9 @@ def test_get_dummies_sparse_mix(): exp = pd.get_dummies(df, sparse=True) res = dd.get_dummies(ddf, sparse=True) - assert_eq(exp, res) + + with ignore_numpy_bool8_deprecation(): + assert_eq(exp, res) assert res.compute().A_a.dtype == "Sparse[uint8, 0]" assert pd.api.types.is_sparse(res.A_a.compute()) diff --git a/dask/diagnostics/tests/test_profiler.py b/dask/diagnostics/tests/test_profiler.py index 9b3c8036a7a..8f9980af76c 100644 --- a/dask/diagnostics/tests/test_profiler.py +++ b/dask/diagnostics/tests/test_profiler.py @@ -369,10 +369,14 @@ def test_saves_file_path_deprecated(): with pytest.warns(FutureWarning) as record: prof.visualize(show=False, file_path=fn) - assert len(record) == 1 - assert os.path.exists(fn) - with open(fn) as f: - assert "html" in f.read().lower() + assert 1 <= len(record) <= 2 + assert "file_path keyword argument is deprecated" in str(record[-1].message) + # This additional warning comes from inside `bokeh`. There's a fix upstream + # https://github.com/bokeh/bokeh/pull/12690 so for now we just ignore it. + if len(record) == 2: + assert "`np.bool8` is a deprecated alias for `np.bool_`" in str( + record[0].message + ) @pytest.mark.skipif("not bokeh")