Script 'mail_helper' called by obssrc
Hello community,
here is the log from the commit of package python-dask-expr for
openSUSE:Factory checked in at 2024-09-09 14:44:32
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-dask-expr (Old)
and /work/SRC/openSUSE:Factory/.python-dask-expr.new.10096 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-dask-expr"
Mon Sep 9 14:44:32 2024 rev:4 rq:1199616 version:1.1.13
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-dask-expr/python-dask-expr.changes
2024-09-03 13:38:25.276959536 +0200
+++
/work/SRC/openSUSE:Factory/.python-dask-expr.new.10096/python-dask-expr.changes
2024-09-09 14:45:25.193134243 +0200
@@ -1,0 +2,14 @@
+Sun Sep 8 14:24:41 UTC 2024 - Ben Greiner <[email protected]>
+
+- Update to 1.1.13
+ * Use task-based rechunking as default (#1131) Hendrik Makait
+ * Improve performance of DelayedsExpr through caching (#1132)
+ Hendrik Makait
+- Release 1.1.12
+ * Fix concat axis 1 bug in divisions (#1128) Patrick Hoefler
+ * Bump pyarrow>=14.0.1 minimum versions (#1127) James
+ Bourbeau
+ * Fix scalar detection of columns coming from sql (#1125)
+ Patrick Hoefler
+
+-------------------------------------------------------------------
Old:
----
dask_expr-1.1.11-gh.tar.gz
New:
----
dask_expr-1.1.13-gh.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-dask-expr.spec ++++++
--- /var/tmp/diff_new_pack.OQuB9k/_old 2024-09-09 14:45:25.637152714 +0200
+++ /var/tmp/diff_new_pack.OQuB9k/_new 2024-09-09 14:45:25.641152880 +0200
@@ -26,7 +26,7 @@
%bcond_with test
%endif
Name: python-dask-expr%{psuffix}
-Version: 1.1.11
+Version: 1.1.13
Release: 0
Summary: High Level Expressions for Dask
License: BSD-3-Clause
@@ -39,9 +39,9 @@
BuildRequires: %{python_module wheel}
BuildRequires: fdupes
BuildRequires: python-rpm-macros
-Requires: python-dask = 2024.8.1
+Requires: python-dask = 2024.8.2
Requires: python-pandas >= 2
-Requires: python-pyarrow >= 7.0.0
+Requires: python-pyarrow >= 14.0.1
Provides: python-dask_expr = %{version}-%{release}
BuildArch: noarch
%if %{with test}
@@ -49,7 +49,7 @@
BuildRequires: %{python_module dask-expr = %{version}}
BuildRequires: %{python_module distributed}
BuildRequires: %{python_module pandas >= 2}
-BuildRequires: %{python_module pyarrow >= 7.0.0}
+BuildRequires: %{python_module pyarrow >= 14.0.1}
BuildRequires: %{python_module pytest}
%endif
%python_subpackages
++++++ dask_expr-1.1.11-gh.tar.gz -> dask_expr-1.1.13-gh.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/changes.md
new/dask-expr-1.1.13/changes.md
--- old/dask-expr-1.1.11/changes.md 2024-08-16 23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/changes.md 2024-09-02 20:09:45.000000000 +0200
@@ -1,5 +1,11 @@
## Dask-expr
+# v1.1.12
+
+- Fix concat axis 1 bug in divisions (:pr:`1128`) `Patrick Hoefler`_
+- Bump `pyarrow>=14.0.1` minimum versions (:pr:`1127`) `James Bourbeau`_
+- Fix scalar detection of columns coming from sql (:pr:`1125`) `Patrick
Hoefler`_
+
# v1.1.11
- Make split_out for categorical default smarter (:pr:`1124`) `Patrick
Hoefler`_
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/ci/environment.yml
new/dask-expr-1.1.13/ci/environment.yml
--- old/dask-expr-1.1.11/ci/environment.yml 2024-08-16 23:52:31.000000000
+0200
+++ new/dask-expr-1.1.13/ci/environment.yml 2024-09-02 20:09:45.000000000
+0200
@@ -6,9 +6,10 @@
- pytest-cov
- pytest-xdist
- dask # overridden by git tip below
- - pyarrow>=7
+ - pyarrow>=14.0.1
- pandas>=2
- pre-commit
+ - sqlalchemy
- xarray
- pip:
- git+https://github.com/dask/distributed
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/ci/environment_released.yml
new/dask-expr-1.1.13/ci/environment_released.yml
--- old/dask-expr-1.1.11/ci/environment_released.yml 2024-08-16
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/ci/environment_released.yml 2024-09-02
20:09:45.000000000 +0200
@@ -6,6 +6,6 @@
- pytest-cov
- pytest-xdist
- dask
- - pyarrow>=7
+ - pyarrow>=14.0.1
- pandas>=2
- pre-commit
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_collection.py
new/dask-expr-1.1.13/dask_expr/_collection.py
--- old/dask-expr-1.1.11/dask_expr/_collection.py 2024-08-16
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/dask_expr/_collection.py 2024-09-02
20:09:45.000000000 +0200
@@ -902,6 +902,8 @@
on = list(on)
elif isinstance(on, str) or isinstance(on, int):
on = [on]
+ elif on_index:
+ on = []
bad_cols = [
index_col
for index_col in on
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_concat.py
new/dask-expr-1.1.13/dask_expr/_concat.py
--- old/dask-expr-1.1.11/dask_expr/_concat.py 2024-08-16 23:52:31.000000000
+0200
+++ new/dask-expr-1.1.13/dask_expr/_concat.py 2024-09-02 20:09:45.000000000
+0200
@@ -136,7 +136,9 @@
dfs = self._frames
if self.axis == 1:
if self._are_co_alinged_or_single_partition:
- return ConcatIndexed(self.ignore_order, self._kwargs,
self.axis, *dfs)
+ return ConcatIndexed(
+ self.ignore_order, self._kwargs, self.axis, self.join, *dfs
+ )
elif (
all(not df.known_divisions for df in dfs)
@@ -149,7 +151,9 @@
" are \n aligned. This assumption is not generally "
"safe."
)
- return ConcatUnindexed(self.ignore_order, self._kwargs,
self.axis, *dfs)
+ return ConcatUnindexed(
+ self.ignore_order, self._kwargs, self.axis, self.join, *dfs
+ )
elif self._all_known_divisions:
from dask_expr._repartition import Repartition
@@ -338,9 +342,9 @@
class ConcatUnindexed(Blockwise):
- _parameters = ["ignore_order", "_kwargs", "axis"]
- _defaults = {"ignore_order": False, "_kwargs": {}, "axis": 1}
- _keyword_only = ["ignore_order", "_kwargs", "axis"]
+ _parameters = ["ignore_order", "_kwargs", "axis", "join"]
+ _defaults = {"ignore_order": False, "_kwargs": {}, "axis": 1, "join":
"outer"}
+ _keyword_only = ["ignore_order", "_kwargs", "axis", "join"]
@functools.cached_property
def _meta(self):
@@ -348,15 +352,19 @@
[df._meta for df in self.dependencies()],
ignore_order=self.ignore_order,
axis=self.axis,
+ join=self.join,
**self.operand("_kwargs"),
)
@staticmethod
- def operation(*args, ignore_order, _kwargs, axis):
+ def operation(*args, ignore_order, _kwargs, axis, join):
return concat_and_check(args, ignore_order=ignore_order)
class ConcatIndexed(ConcatUnindexed):
@staticmethod
- def operation(*args, ignore_order, _kwargs, axis):
- return methods.concat(args, ignore_order=ignore_order, axis=axis)
+ def operation(*args, ignore_order, _kwargs, axis, join):
+ return methods.concat(args, ignore_order=ignore_order, axis=axis,
join=join)
+
+ def _broadcast_dep(self, dep: Expr):
+ return dep.npartitions == 1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_expr.py
new/dask-expr-1.1.13/dask_expr/_expr.py
--- old/dask-expr-1.1.11/dask_expr/_expr.py 2024-08-16 23:52:31.000000000
+0200
+++ new/dask-expr-1.1.13/dask_expr/_expr.py 2024-09-02 20:09:45.000000000
+0200
@@ -2986,7 +2986,7 @@
def __str__(self):
return f"{type(self).__name__}({str(self.operands[0])})"
- @property
+ @functools.cached_property
def _name(self):
return "delayed-container-" + _tokenize_deterministic(*self.operands)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_shuffle.py
new/dask-expr-1.1.13/dask_expr/_shuffle.py
--- old/dask-expr-1.1.11/dask_expr/_shuffle.py 2024-08-16 23:52:31.000000000
+0200
+++ new/dask-expr-1.1.13/dask_expr/_shuffle.py 2024-09-02 20:09:45.000000000
+0200
@@ -657,7 +657,7 @@
"""
return (
not isinstance(key, Expr)
- and (np.isscalar(key) or isinstance(key, tuple))
+ and (np.isscalar(key) or pd.api.types.is_scalar(key) or
isinstance(key, tuple))
and key in df.columns
)
@@ -683,7 +683,7 @@
return (
index_name is not None
and not isinstance(key, Expr)
- and (np.isscalar(key) or isinstance(key, tuple))
+ and (np.isscalar(key) or pd.api.types.is_scalar(key) or
isinstance(key, tuple))
and key == index_name
and key not in getattr(df, "columns", ())
)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_version.py
new/dask-expr-1.1.13/dask_expr/_version.py
--- old/dask-expr-1.1.11/dask_expr/_version.py 2024-08-16 23:52:31.000000000
+0200
+++ new/dask-expr-1.1.13/dask_expr/_version.py 2024-09-02 20:09:45.000000000
+0200
@@ -26,9 +26,9 @@
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
- git_refnames = " (tag: v1.1.11)"
- git_full = "b0c980e51525ce49fb29c89711ce4536970e377b"
- git_date = "2024-08-16 16:52:31 -0500"
+ git_refnames = " (tag: v1.1.13)"
+ git_full = "4e5a635135149ba8b3323d95167438f32a7d84f9"
+ git_date = "2024-09-02 14:09:45 -0400"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/array/rechunk.py
new/dask-expr-1.1.13/dask_expr/array/rechunk.py
--- old/dask-expr-1.1.11/dask_expr/array/rechunk.py 2024-08-16
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/dask_expr/array/rechunk.py 2024-09-02
20:09:45.000000000 +0200
@@ -87,6 +87,9 @@
def _layer(self):
method = self.method or dask.config.get("array.rechunk.method")
+ # Default to tasks since P2P is not implemented
+ if method is None:
+ method = "tasks"
if method == "tasks":
steps = plan_rechunk(
self.array.chunks,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/io/tests/test_sql.py
new/dask-expr-1.1.13/dask_expr/io/tests/test_sql.py
--- old/dask-expr-1.1.11/dask_expr/io/tests/test_sql.py 1970-01-01
01:00:00.000000000 +0100
+++ new/dask-expr-1.1.13/dask_expr/io/tests/test_sql.py 2024-09-02
20:09:45.000000000 +0200
@@ -0,0 +1,38 @@
+import pytest
+from dask.utils import tmpfile
+
+from dask_expr import from_pandas, read_sql_table
+from dask_expr.tests._util import _backend_library, assert_eq
+
+pd = _backend_library()
+
+pytest.importorskip("sqlalchemy")
+
+
+def test_shuffle_after_read_sql():
+ with tmpfile() as f:
+ uri = "sqlite:///%s" % f
+
+ df = pd.DataFrame(
+ {
+ "id": [1, 2, 3, 4, 5, 6, 7, 8],
+ "value": [
+ "value1",
+ "value2",
+ "value3",
+ "value3",
+ "value4",
+ "value4",
+ "value4",
+ "value5",
+ ],
+ }
+ ).set_index("id")
+ ddf = from_pandas(df, npartitions=1)
+
+ ddf.to_sql("test_table", uri, if_exists="append")
+ result = read_sql_table("test_table", con=uri, index_col="id")
+ assert_eq(
+ result["value"].unique(), pd.Series(df["value"].unique(),
name="value")
+ )
+ assert_eq(result.shuffle(on_index=True), df)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/tests/test_concat.py
new/dask-expr-1.1.13/dask_expr/tests/test_concat.py
--- old/dask-expr-1.1.11/dask_expr/tests/test_concat.py 2024-08-16
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/dask_expr/tests/test_concat.py 2024-09-02
20:09:45.000000000 +0200
@@ -1,7 +1,7 @@
import numpy as np
import pytest
-from dask_expr import DataFrame, FrameBase, Len, Series, concat, from_pandas
+from dask_expr import DataFrame, FrameBase, Len, Series, concat, from_dict,
from_pandas
from dask_expr.tests._util import _backend_library, assert_eq
# Set DataFrame backend for this module
@@ -349,3 +349,14 @@
result = concat([df.x, df.y], axis=1)[["x"]]
expected = pd.concat([pdf.x, pdf.y], axis=1)[["x"]]
assert_eq(result, expected)
+
+
[email protected]("npartitions", [1, 2])
[email protected]("join", ["inner", "outer"])
+def test_concat_single_partition_known_divisions(join, npartitions):
+ df1 = from_dict({"a": [1, 2, 3], "b": [1, 2, 3]}, npartitions=npartitions)
+ df2 = from_dict({"c": [1, 2]}, npartitions=npartitions)
+
+ result = concat([df1, df2], axis=1, join=join)
+ expected = pd.concat([df1.compute(), df2.compute()], axis=1, join=join)
+ assert_eq(result, expected)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/dask-expr-1.1.11/pyproject.toml
new/dask-expr-1.1.13/pyproject.toml
--- old/dask-expr-1.1.11/pyproject.toml 2024-08-16 23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/pyproject.toml 2024-09-02 20:09:45.000000000 +0200
@@ -25,8 +25,8 @@
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
- "dask == 2024.8.1",
- "pyarrow>=7.0.0",
+ "dask == 2024.8.2",
+ "pyarrow>=14.0.1",
"pandas >= 2",
]