Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-dask-expr for openSUSE:Factory checked in at 2024-09-09 14:44:32 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-dask-expr (Old) and /work/SRC/openSUSE:Factory/.python-dask-expr.new.10096 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-dask-expr" Mon Sep 9 14:44:32 2024 rev:4 rq:1199616 version:1.1.13 Changes: -------- --- /work/SRC/openSUSE:Factory/python-dask-expr/python-dask-expr.changes 2024-09-03 13:38:25.276959536 +0200 +++ /work/SRC/openSUSE:Factory/.python-dask-expr.new.10096/python-dask-expr.changes 2024-09-09 14:45:25.193134243 +0200 @@ -1,0 +2,14 @@ +Sun Sep 8 14:24:41 UTC 2024 - Ben Greiner <c...@bnavigator.de> + +- Update to 1.1.13 + * Use task-based rechunking as default (#1131) Hendrik Makait + * Improve performance of DelayedsExpr through caching (#1132) + Hendrik Makait +- Release 1.1.12 + * Fix concat axis 1 bug in divisions (#1128) Patrick Hoefler + * Bump pyarrow>=14.0.1 minimum versions (#1127) James + Bourbeau + * Fix scalar detection of columns coming from sql (#1125) + Patrick Hoefler + +------------------------------------------------------------------- Old: ---- dask_expr-1.1.11-gh.tar.gz New: ---- dask_expr-1.1.13-gh.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-dask-expr.spec ++++++ --- /var/tmp/diff_new_pack.OQuB9k/_old 2024-09-09 14:45:25.637152714 +0200 +++ /var/tmp/diff_new_pack.OQuB9k/_new 2024-09-09 14:45:25.641152880 +0200 @@ -26,7 +26,7 @@ %bcond_with test %endif Name: python-dask-expr%{psuffix} -Version: 1.1.11 +Version: 1.1.13 Release: 0 Summary: High Level Expressions for Dask License: BSD-3-Clause @@ -39,9 +39,9 @@ BuildRequires: %{python_module wheel} BuildRequires: fdupes BuildRequires: python-rpm-macros -Requires: python-dask = 2024.8.1 +Requires: python-dask = 2024.8.2 Requires: python-pandas >= 2 -Requires: python-pyarrow >= 7.0.0 +Requires: python-pyarrow >= 14.0.1 Provides: python-dask_expr = %{version}-%{release} BuildArch: noarch %if %{with test} @@ -49,7 +49,7 @@ BuildRequires: %{python_module dask-expr = %{version}} BuildRequires: %{python_module distributed} BuildRequires: %{python_module pandas >= 2} -BuildRequires: %{python_module pyarrow >= 7.0.0} +BuildRequires: %{python_module pyarrow >= 14.0.1} BuildRequires: %{python_module pytest} %endif %python_subpackages ++++++ dask_expr-1.1.11-gh.tar.gz -> dask_expr-1.1.13-gh.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/changes.md new/dask-expr-1.1.13/changes.md --- old/dask-expr-1.1.11/changes.md 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/changes.md 2024-09-02 20:09:45.000000000 +0200 @@ -1,5 +1,11 @@ ## Dask-expr +# v1.1.12 + +- Fix concat axis 1 bug in divisions (:pr:`1128`) `Patrick Hoefler`_ +- Bump `pyarrow>=14.0.1` minimum versions (:pr:`1127`) `James Bourbeau`_ +- Fix scalar detection of columns coming from sql (:pr:`1125`) `Patrick Hoefler`_ + # v1.1.11 - Make split_out for categorical default smarter (:pr:`1124`) `Patrick Hoefler`_ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/ci/environment.yml new/dask-expr-1.1.13/ci/environment.yml --- old/dask-expr-1.1.11/ci/environment.yml 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/ci/environment.yml 2024-09-02 20:09:45.000000000 +0200 @@ -6,9 +6,10 @@ - pytest-cov - pytest-xdist - dask # overridden by git tip below - - pyarrow>=7 + - pyarrow>=14.0.1 - pandas>=2 - pre-commit + - sqlalchemy - xarray - pip: - git+https://github.com/dask/distributed diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/ci/environment_released.yml new/dask-expr-1.1.13/ci/environment_released.yml --- old/dask-expr-1.1.11/ci/environment_released.yml 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/ci/environment_released.yml 2024-09-02 20:09:45.000000000 +0200 @@ -6,6 +6,6 @@ - pytest-cov - pytest-xdist - dask - - pyarrow>=7 + - pyarrow>=14.0.1 - pandas>=2 - pre-commit diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_collection.py new/dask-expr-1.1.13/dask_expr/_collection.py --- old/dask-expr-1.1.11/dask_expr/_collection.py 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/dask_expr/_collection.py 2024-09-02 20:09:45.000000000 +0200 @@ -902,6 +902,8 @@ on = list(on) elif isinstance(on, str) or isinstance(on, int): on = [on] + elif on_index: + on = [] bad_cols = [ index_col for index_col in on diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_concat.py new/dask-expr-1.1.13/dask_expr/_concat.py --- old/dask-expr-1.1.11/dask_expr/_concat.py 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/dask_expr/_concat.py 2024-09-02 20:09:45.000000000 +0200 @@ -136,7 +136,9 @@ dfs = self._frames if self.axis == 1: if self._are_co_alinged_or_single_partition: - return ConcatIndexed(self.ignore_order, self._kwargs, self.axis, *dfs) + return ConcatIndexed( + self.ignore_order, self._kwargs, self.axis, self.join, *dfs + ) elif ( all(not df.known_divisions for df in dfs) @@ -149,7 +151,9 @@ " are \n aligned. This assumption is not generally " "safe." ) - return ConcatUnindexed(self.ignore_order, self._kwargs, self.axis, *dfs) + return ConcatUnindexed( + self.ignore_order, self._kwargs, self.axis, self.join, *dfs + ) elif self._all_known_divisions: from dask_expr._repartition import Repartition @@ -338,9 +342,9 @@ class ConcatUnindexed(Blockwise): - _parameters = ["ignore_order", "_kwargs", "axis"] - _defaults = {"ignore_order": False, "_kwargs": {}, "axis": 1} - _keyword_only = ["ignore_order", "_kwargs", "axis"] + _parameters = ["ignore_order", "_kwargs", "axis", "join"] + _defaults = {"ignore_order": False, "_kwargs": {}, "axis": 1, "join": "outer"} + _keyword_only = ["ignore_order", "_kwargs", "axis", "join"] @functools.cached_property def _meta(self): @@ -348,15 +352,19 @@ [df._meta for df in self.dependencies()], ignore_order=self.ignore_order, axis=self.axis, + join=self.join, **self.operand("_kwargs"), ) @staticmethod - def operation(*args, ignore_order, _kwargs, axis): + def operation(*args, ignore_order, _kwargs, axis, join): return concat_and_check(args, ignore_order=ignore_order) class ConcatIndexed(ConcatUnindexed): @staticmethod - def operation(*args, ignore_order, _kwargs, axis): - return methods.concat(args, ignore_order=ignore_order, axis=axis) + def operation(*args, ignore_order, _kwargs, axis, join): + return methods.concat(args, ignore_order=ignore_order, axis=axis, join=join) + + def _broadcast_dep(self, dep: Expr): + return dep.npartitions == 1 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_expr.py new/dask-expr-1.1.13/dask_expr/_expr.py --- old/dask-expr-1.1.11/dask_expr/_expr.py 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/dask_expr/_expr.py 2024-09-02 20:09:45.000000000 +0200 @@ -2986,7 +2986,7 @@ def __str__(self): return f"{type(self).__name__}({str(self.operands[0])})" - @property + @functools.cached_property def _name(self): return "delayed-container-" + _tokenize_deterministic(*self.operands) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_shuffle.py new/dask-expr-1.1.13/dask_expr/_shuffle.py --- old/dask-expr-1.1.11/dask_expr/_shuffle.py 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/dask_expr/_shuffle.py 2024-09-02 20:09:45.000000000 +0200 @@ -657,7 +657,7 @@ """ return ( not isinstance(key, Expr) - and (np.isscalar(key) or isinstance(key, tuple)) + and (np.isscalar(key) or pd.api.types.is_scalar(key) or isinstance(key, tuple)) and key in df.columns ) @@ -683,7 +683,7 @@ return ( index_name is not None and not isinstance(key, Expr) - and (np.isscalar(key) or isinstance(key, tuple)) + and (np.isscalar(key) or pd.api.types.is_scalar(key) or isinstance(key, tuple)) and key == index_name and key not in getattr(df, "columns", ()) ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_version.py new/dask-expr-1.1.13/dask_expr/_version.py --- old/dask-expr-1.1.11/dask_expr/_version.py 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/dask_expr/_version.py 2024-09-02 20:09:45.000000000 +0200 @@ -26,9 +26,9 @@ # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). - git_refnames = " (tag: v1.1.11)" - git_full = "b0c980e51525ce49fb29c89711ce4536970e377b" - git_date = "2024-08-16 16:52:31 -0500" + git_refnames = " (tag: v1.1.13)" + git_full = "4e5a635135149ba8b3323d95167438f32a7d84f9" + git_date = "2024-09-02 14:09:45 -0400" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/array/rechunk.py new/dask-expr-1.1.13/dask_expr/array/rechunk.py --- old/dask-expr-1.1.11/dask_expr/array/rechunk.py 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/dask_expr/array/rechunk.py 2024-09-02 20:09:45.000000000 +0200 @@ -87,6 +87,9 @@ def _layer(self): method = self.method or dask.config.get("array.rechunk.method") + # Default to tasks since P2P is not implemented + if method is None: + method = "tasks" if method == "tasks": steps = plan_rechunk( self.array.chunks, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/io/tests/test_sql.py new/dask-expr-1.1.13/dask_expr/io/tests/test_sql.py --- old/dask-expr-1.1.11/dask_expr/io/tests/test_sql.py 1970-01-01 01:00:00.000000000 +0100 +++ new/dask-expr-1.1.13/dask_expr/io/tests/test_sql.py 2024-09-02 20:09:45.000000000 +0200 @@ -0,0 +1,38 @@ +import pytest +from dask.utils import tmpfile + +from dask_expr import from_pandas, read_sql_table +from dask_expr.tests._util import _backend_library, assert_eq + +pd = _backend_library() + +pytest.importorskip("sqlalchemy") + + +def test_shuffle_after_read_sql(): + with tmpfile() as f: + uri = "sqlite:///%s" % f + + df = pd.DataFrame( + { + "id": [1, 2, 3, 4, 5, 6, 7, 8], + "value": [ + "value1", + "value2", + "value3", + "value3", + "value4", + "value4", + "value4", + "value5", + ], + } + ).set_index("id") + ddf = from_pandas(df, npartitions=1) + + ddf.to_sql("test_table", uri, if_exists="append") + result = read_sql_table("test_table", con=uri, index_col="id") + assert_eq( + result["value"].unique(), pd.Series(df["value"].unique(), name="value") + ) + assert_eq(result.shuffle(on_index=True), df) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/tests/test_concat.py new/dask-expr-1.1.13/dask_expr/tests/test_concat.py --- old/dask-expr-1.1.11/dask_expr/tests/test_concat.py 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/dask_expr/tests/test_concat.py 2024-09-02 20:09:45.000000000 +0200 @@ -1,7 +1,7 @@ import numpy as np import pytest -from dask_expr import DataFrame, FrameBase, Len, Series, concat, from_pandas +from dask_expr import DataFrame, FrameBase, Len, Series, concat, from_dict, from_pandas from dask_expr.tests._util import _backend_library, assert_eq # Set DataFrame backend for this module @@ -349,3 +349,14 @@ result = concat([df.x, df.y], axis=1)[["x"]] expected = pd.concat([pdf.x, pdf.y], axis=1)[["x"]] assert_eq(result, expected) + + +@pytest.mark.parametrize("npartitions", [1, 2]) +@pytest.mark.parametrize("join", ["inner", "outer"]) +def test_concat_single_partition_known_divisions(join, npartitions): + df1 = from_dict({"a": [1, 2, 3], "b": [1, 2, 3]}, npartitions=npartitions) + df2 = from_dict({"c": [1, 2]}, npartitions=npartitions) + + result = concat([df1, df2], axis=1, join=join) + expected = pd.concat([df1.compute(), df2.compute()], axis=1, join=join) + assert_eq(result, expected) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/dask-expr-1.1.11/pyproject.toml new/dask-expr-1.1.13/pyproject.toml --- old/dask-expr-1.1.11/pyproject.toml 2024-08-16 23:52:31.000000000 +0200 +++ new/dask-expr-1.1.13/pyproject.toml 2024-09-02 20:09:45.000000000 +0200 @@ -25,8 +25,8 @@ readme = "README.md" requires-python = ">=3.10" dependencies = [ - "dask == 2024.8.1", - "pyarrow>=7.0.0", + "dask == 2024.8.2", + "pyarrow>=14.0.1", "pandas >= 2", ]