commit python-dask-expr for openSUSE:Factory

Source-Sync Mon, 09 Sep 2024 05:46:07 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-dask-expr for 
openSUSE:Factory checked in at 2024-09-09 14:44:32
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-dask-expr (Old)
 and      /work/SRC/openSUSE:Factory/.python-dask-expr.new.10096 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-dask-expr"

Mon Sep  9 14:44:32 2024 rev:4 rq:1199616 version:1.1.13

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-dask-expr/python-dask-expr.changes        
2024-09-03 13:38:25.276959536 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-dask-expr.new.10096/python-dask-expr.changes 
    2024-09-09 14:45:25.193134243 +0200
@@ -1,0 +2,14 @@
+Sun Sep  8 14:24:41 UTC 2024 - Ben Greiner <c...@bnavigator.de>
+
+- Update to 1.1.13
+  * Use task-based rechunking as default (#1131) Hendrik Makait
+  * Improve performance of DelayedsExpr through caching (#1132)
+    Hendrik Makait
+- Release 1.1.12
+  * Fix concat axis 1 bug in divisions (#1128) Patrick Hoefler
+  * Bump pyarrow>=14.0.1 minimum versions (#1127) James
+    Bourbeau
+  * Fix scalar detection of columns coming from sql (#1125)
+    Patrick Hoefler
+
+-------------------------------------------------------------------

Old:
----
  dask_expr-1.1.11-gh.tar.gz

New:
----
  dask_expr-1.1.13-gh.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-dask-expr.spec ++++++
--- /var/tmp/diff_new_pack.OQuB9k/_old  2024-09-09 14:45:25.637152714 +0200
+++ /var/tmp/diff_new_pack.OQuB9k/_new  2024-09-09 14:45:25.641152880 +0200
@@ -26,7 +26,7 @@
 %bcond_with test
 %endif
 Name:           python-dask-expr%{psuffix}
-Version:        1.1.11
+Version:        1.1.13
 Release:        0
 Summary:        High Level Expressions for Dask
 License:        BSD-3-Clause
@@ -39,9 +39,9 @@
 BuildRequires:  %{python_module wheel}
 BuildRequires:  fdupes
 BuildRequires:  python-rpm-macros
-Requires:       python-dask = 2024.8.1
+Requires:       python-dask = 2024.8.2
 Requires:       python-pandas >= 2
-Requires:       python-pyarrow >= 7.0.0
+Requires:       python-pyarrow >= 14.0.1
 Provides:       python-dask_expr = %{version}-%{release}
 BuildArch:      noarch
 %if %{with test}
@@ -49,7 +49,7 @@
 BuildRequires:  %{python_module dask-expr = %{version}}
 BuildRequires:  %{python_module distributed}
 BuildRequires:  %{python_module pandas >= 2}
-BuildRequires:  %{python_module pyarrow >= 7.0.0}
+BuildRequires:  %{python_module pyarrow >= 14.0.1}
 BuildRequires:  %{python_module pytest}
 %endif
 %python_subpackages

++++++ dask_expr-1.1.11-gh.tar.gz -> dask_expr-1.1.13-gh.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/changes.md 
new/dask-expr-1.1.13/changes.md
--- old/dask-expr-1.1.11/changes.md     2024-08-16 23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/changes.md     2024-09-02 20:09:45.000000000 +0200
@@ -1,5 +1,11 @@
 ## Dask-expr
 
+# v1.1.12
+
+- Fix concat axis 1 bug in divisions (:pr:`1128`) `Patrick Hoefler`_
+- Bump `pyarrow>=14.0.1` minimum versions (:pr:`1127`) `James Bourbeau`_
+- Fix scalar detection of columns coming from sql (:pr:`1125`) `Patrick 
Hoefler`_
+
 # v1.1.11
 
 - Make split_out for categorical default smarter (:pr:`1124`) `Patrick 
Hoefler`_
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/ci/environment.yml 
new/dask-expr-1.1.13/ci/environment.yml
--- old/dask-expr-1.1.11/ci/environment.yml     2024-08-16 23:52:31.000000000 
+0200
+++ new/dask-expr-1.1.13/ci/environment.yml     2024-09-02 20:09:45.000000000 
+0200
@@ -6,9 +6,10 @@
   - pytest-cov
   - pytest-xdist
   - dask  # overridden by git tip below
-  - pyarrow>=7
+  - pyarrow>=14.0.1
   - pandas>=2
   - pre-commit
+  - sqlalchemy
   - xarray
   - pip:
       - git+https://github.com/dask/distributed
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/ci/environment_released.yml 
new/dask-expr-1.1.13/ci/environment_released.yml
--- old/dask-expr-1.1.11/ci/environment_released.yml    2024-08-16 
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/ci/environment_released.yml    2024-09-02 
20:09:45.000000000 +0200
@@ -6,6 +6,6 @@
   - pytest-cov
   - pytest-xdist
   - dask
-  - pyarrow>=7
+  - pyarrow>=14.0.1
   - pandas>=2
   - pre-commit
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_collection.py 
new/dask-expr-1.1.13/dask_expr/_collection.py
--- old/dask-expr-1.1.11/dask_expr/_collection.py       2024-08-16 
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/dask_expr/_collection.py       2024-09-02 
20:09:45.000000000 +0200
@@ -902,6 +902,8 @@
                 on = list(on)
             elif isinstance(on, str) or isinstance(on, int):
                 on = [on]
+            elif on_index:
+                on = []
             bad_cols = [
                 index_col
                 for index_col in on
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_concat.py 
new/dask-expr-1.1.13/dask_expr/_concat.py
--- old/dask-expr-1.1.11/dask_expr/_concat.py   2024-08-16 23:52:31.000000000 
+0200
+++ new/dask-expr-1.1.13/dask_expr/_concat.py   2024-09-02 20:09:45.000000000 
+0200
@@ -136,7 +136,9 @@
         dfs = self._frames
         if self.axis == 1:
             if self._are_co_alinged_or_single_partition:
-                return ConcatIndexed(self.ignore_order, self._kwargs, 
self.axis, *dfs)
+                return ConcatIndexed(
+                    self.ignore_order, self._kwargs, self.axis, self.join, *dfs
+                )
 
             elif (
                 all(not df.known_divisions for df in dfs)
@@ -149,7 +151,9 @@
                         " are \n aligned. This assumption is not generally "
                         "safe."
                     )
-                return ConcatUnindexed(self.ignore_order, self._kwargs, 
self.axis, *dfs)
+                return ConcatUnindexed(
+                    self.ignore_order, self._kwargs, self.axis, self.join, *dfs
+                )
             elif self._all_known_divisions:
                 from dask_expr._repartition import Repartition
 
@@ -338,9 +342,9 @@
 
 
 class ConcatUnindexed(Blockwise):
-    _parameters = ["ignore_order", "_kwargs", "axis"]
-    _defaults = {"ignore_order": False, "_kwargs": {}, "axis": 1}
-    _keyword_only = ["ignore_order", "_kwargs", "axis"]
+    _parameters = ["ignore_order", "_kwargs", "axis", "join"]
+    _defaults = {"ignore_order": False, "_kwargs": {}, "axis": 1, "join": 
"outer"}
+    _keyword_only = ["ignore_order", "_kwargs", "axis", "join"]
 
     @functools.cached_property
     def _meta(self):
@@ -348,15 +352,19 @@
             [df._meta for df in self.dependencies()],
             ignore_order=self.ignore_order,
             axis=self.axis,
+            join=self.join,
             **self.operand("_kwargs"),
         )
 
     @staticmethod
-    def operation(*args, ignore_order, _kwargs, axis):
+    def operation(*args, ignore_order, _kwargs, axis, join):
         return concat_and_check(args, ignore_order=ignore_order)
 
 
 class ConcatIndexed(ConcatUnindexed):
     @staticmethod
-    def operation(*args, ignore_order, _kwargs, axis):
-        return methods.concat(args, ignore_order=ignore_order, axis=axis)
+    def operation(*args, ignore_order, _kwargs, axis, join):
+        return methods.concat(args, ignore_order=ignore_order, axis=axis, 
join=join)
+
+    def _broadcast_dep(self, dep: Expr):
+        return dep.npartitions == 1
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_expr.py 
new/dask-expr-1.1.13/dask_expr/_expr.py
--- old/dask-expr-1.1.11/dask_expr/_expr.py     2024-08-16 23:52:31.000000000 
+0200
+++ new/dask-expr-1.1.13/dask_expr/_expr.py     2024-09-02 20:09:45.000000000 
+0200
@@ -2986,7 +2986,7 @@
     def __str__(self):
         return f"{type(self).__name__}({str(self.operands[0])})"
 
-    @property
+    @functools.cached_property
     def _name(self):
         return "delayed-container-" + _tokenize_deterministic(*self.operands)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_shuffle.py 
new/dask-expr-1.1.13/dask_expr/_shuffle.py
--- old/dask-expr-1.1.11/dask_expr/_shuffle.py  2024-08-16 23:52:31.000000000 
+0200
+++ new/dask-expr-1.1.13/dask_expr/_shuffle.py  2024-09-02 20:09:45.000000000 
+0200
@@ -657,7 +657,7 @@
     """
     return (
         not isinstance(key, Expr)
-        and (np.isscalar(key) or isinstance(key, tuple))
+        and (np.isscalar(key) or pd.api.types.is_scalar(key) or 
isinstance(key, tuple))
         and key in df.columns
     )
 
@@ -683,7 +683,7 @@
     return (
         index_name is not None
         and not isinstance(key, Expr)
-        and (np.isscalar(key) or isinstance(key, tuple))
+        and (np.isscalar(key) or pd.api.types.is_scalar(key) or 
isinstance(key, tuple))
         and key == index_name
         and key not in getattr(df, "columns", ())
     )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/_version.py 
new/dask-expr-1.1.13/dask_expr/_version.py
--- old/dask-expr-1.1.11/dask_expr/_version.py  2024-08-16 23:52:31.000000000 
+0200
+++ new/dask-expr-1.1.13/dask_expr/_version.py  2024-09-02 20:09:45.000000000 
+0200
@@ -26,9 +26,9 @@
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (tag: v1.1.11)"
-    git_full = "b0c980e51525ce49fb29c89711ce4536970e377b"
-    git_date = "2024-08-16 16:52:31 -0500"
+    git_refnames = " (tag: v1.1.13)"
+    git_full = "4e5a635135149ba8b3323d95167438f32a7d84f9"
+    git_date = "2024-09-02 14:09:45 -0400"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/array/rechunk.py 
new/dask-expr-1.1.13/dask_expr/array/rechunk.py
--- old/dask-expr-1.1.11/dask_expr/array/rechunk.py     2024-08-16 
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/dask_expr/array/rechunk.py     2024-09-02 
20:09:45.000000000 +0200
@@ -87,6 +87,9 @@
 
     def _layer(self):
         method = self.method or dask.config.get("array.rechunk.method")
+        # Default to tasks since P2P is not implemented
+        if method is None:
+            method = "tasks"
         if method == "tasks":
             steps = plan_rechunk(
                 self.array.chunks,
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/io/tests/test_sql.py 
new/dask-expr-1.1.13/dask_expr/io/tests/test_sql.py
--- old/dask-expr-1.1.11/dask_expr/io/tests/test_sql.py 1970-01-01 
01:00:00.000000000 +0100
+++ new/dask-expr-1.1.13/dask_expr/io/tests/test_sql.py 2024-09-02 
20:09:45.000000000 +0200
@@ -0,0 +1,38 @@
+import pytest
+from dask.utils import tmpfile
+
+from dask_expr import from_pandas, read_sql_table
+from dask_expr.tests._util import _backend_library, assert_eq
+
+pd = _backend_library()
+
+pytest.importorskip("sqlalchemy")
+
+
+def test_shuffle_after_read_sql():
+    with tmpfile() as f:
+        uri = "sqlite:///%s" % f
+
+        df = pd.DataFrame(
+            {
+                "id": [1, 2, 3, 4, 5, 6, 7, 8],
+                "value": [
+                    "value1",
+                    "value2",
+                    "value3",
+                    "value3",
+                    "value4",
+                    "value4",
+                    "value4",
+                    "value5",
+                ],
+            }
+        ).set_index("id")
+        ddf = from_pandas(df, npartitions=1)
+
+        ddf.to_sql("test_table", uri, if_exists="append")
+        result = read_sql_table("test_table", con=uri, index_col="id")
+        assert_eq(
+            result["value"].unique(), pd.Series(df["value"].unique(), 
name="value")
+        )
+        assert_eq(result.shuffle(on_index=True), df)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/dask_expr/tests/test_concat.py 
new/dask-expr-1.1.13/dask_expr/tests/test_concat.py
--- old/dask-expr-1.1.11/dask_expr/tests/test_concat.py 2024-08-16 
23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/dask_expr/tests/test_concat.py 2024-09-02 
20:09:45.000000000 +0200
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from dask_expr import DataFrame, FrameBase, Len, Series, concat, from_pandas
+from dask_expr import DataFrame, FrameBase, Len, Series, concat, from_dict, 
from_pandas
 from dask_expr.tests._util import _backend_library, assert_eq
 
 # Set DataFrame backend for this module
@@ -349,3 +349,14 @@
     result = concat([df.x, df.y], axis=1)[["x"]]
     expected = pd.concat([pdf.x, pdf.y], axis=1)[["x"]]
     assert_eq(result, expected)
+
+
+@pytest.mark.parametrize("npartitions", [1, 2])
+@pytest.mark.parametrize("join", ["inner", "outer"])
+def test_concat_single_partition_known_divisions(join, npartitions):
+    df1 = from_dict({"a": [1, 2, 3], "b": [1, 2, 3]}, npartitions=npartitions)
+    df2 = from_dict({"c": [1, 2]}, npartitions=npartitions)
+
+    result = concat([df1, df2], axis=1, join=join)
+    expected = pd.concat([df1.compute(), df2.compute()], axis=1, join=join)
+    assert_eq(result, expected)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/dask-expr-1.1.11/pyproject.toml 
new/dask-expr-1.1.13/pyproject.toml
--- old/dask-expr-1.1.11/pyproject.toml 2024-08-16 23:52:31.000000000 +0200
+++ new/dask-expr-1.1.13/pyproject.toml 2024-09-02 20:09:45.000000000 +0200
@@ -25,8 +25,8 @@
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
-    "dask == 2024.8.1",
-    "pyarrow>=7.0.0",
+    "dask == 2024.8.2",
+    "pyarrow>=14.0.1",
     "pandas >= 2",
 ]

commit python-dask-expr for openSUSE:Factory

Reply via email to