[spark] branch master updated: [SPARK-42621][PS] Add inclusive parameter for pd.date_range

gurwls223 Wed, 09 Aug 2023 19:05:30 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 3b3e0fdc61c [SPARK-42621][PS] Add inclusive parameter for pd.date_range
3b3e0fdc61c is described below

commit 3b3e0fdc61cf659bb97f94d9b12b8dcdad999e62
Author: Zhyhimont Dmitry <zhyhimon...@profitero.com>
AuthorDate: Thu Aug 10 11:05:11 2023 +0900

    [SPARK-42621][PS] Add inclusive parameter for pd.date_range
    
    ### What changes were proposed in this pull request?
    Add inclusive parameter for pd.date_range to support the pandas 2.0.0
    
    ### Why are the changes needed?
    When pandas 2.0.0 is released, we should match the behavior in pandas API 
on Spark.
    
    ### Does this PR introduce any user-facing change?
    yes, the API changes
    
    Before:
    ps.date_range(start='2017-01-01', end='2017-01-04', closed=None)
    
    After:
    ps.date_range(start='2017-01-01', end='2017-01-04', inclusive="both")
    
    ### How was this patch tested?
    Unit tests were updated
    
    Closes #40665 from dzhigimont/SPARK-42621_ZH.
    
    Lead-authored-by: Zhyhimont Dmitry <zhyhimon...@profitero.com>
    Co-authored-by: Zhyhimont Dmitry <dzhigim...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/pandas/namespace.py            | 32 +++++++++++++++++++++++++--
 python/pyspark/pandas/tests/test_namespace.py | 25 +++++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/pandas/namespace.py 
b/python/pyspark/pandas/namespace.py
index ba93e5a3ee5..fddf1bec63f 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -1751,8 +1751,6 @@ def to_datetime(
     )
 
 
-# TODO(SPARK-42621): Add `inclusive` parameter.
-# See https://github.com/pandas-dev/pandas/issues/40245
 def date_range(
     start: Union[str, Any] = None,
     end: Union[str, Any] = None,
@@ -1761,6 +1759,7 @@ def date_range(
     tz: Optional[Union[str, tzinfo]] = None,
     normalize: bool = False,
     name: Optional[str] = None,
+    inclusive: str = "both",
     **kwargs: Any,
 ) -> DatetimeIndex:
     """
@@ -1784,6 +1783,11 @@ def date_range(
         Normalize start/end dates to midnight before generating date range.
     name : str, default None
         Name of the resulting DatetimeIndex.
+    inclusive : {"both", "neither", "left", "right"}, default "both"
+        Include boundaries; Whether to set each bound as closed or open.
+
+        .. versionadded:: 4.0.0
+
     **kwargs
         For compatibility. Has no effect on the result.
 
@@ -1867,6 +1871,29 @@ def date_range(
     DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
                    '2019-01-31'],
                   dtype='datetime64[ns]', freq=None)
+
+    `inclusive` controls whether to include `start` and `end` that are on the
+    boundary. The default includes boundary points on either end.
+
+    >>> ps.date_range(
+    ...     start='2017-01-01', end='2017-01-04', inclusive="both"
+    ... )  # doctest: +NORMALIZE_WHITESPACE
+    DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
+                   dtype='datetime64[ns]', freq=None)
+
+    Use ``inclusive='left'`` to exclude `end` if it falls on the boundary.
+
+    >>> ps.date_range(
+    ...     start='2017-01-01', end='2017-01-04', inclusive='left'
+    ... )  # doctest: +NORMALIZE_WHITESPACE
+    DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], 
dtype='datetime64[ns]', freq=None)
+
+    Use ``inclusive='right'`` to exclude `start` if it falls on the boundary.
+
+    >>> ps.date_range(
+    ...     start='2017-01-01', end='2017-01-04', inclusive='right'
+    ... )  # doctest: +NORMALIZE_WHITESPACE
+    DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], 
dtype='datetime64[ns]', freq=None)
     """
     assert freq not in ["N", "ns"], "nanoseconds is not supported"
     assert tz is None, "Localized DatetimeIndex is not supported"
@@ -1882,6 +1909,7 @@ def date_range(
                 tz=tz,
                 normalize=normalize,
                 name=name,
+                inclusive=inclusive,
                 **kwargs,
             )
         ),
diff --git a/python/pyspark/pandas/tests/test_namespace.py 
b/python/pyspark/pandas/tests/test_namespace.py
index d1d1e1af935..5c202046717 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -221,6 +221,31 @@ class NamespaceTestsMixin:
             pd.date_range(start="1/1/2018", periods=5, 
freq=pd.offsets.MonthEnd(3)),
         )
 
+        self.assert_eq(
+            ps.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="left"),
+            pd.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="left"),
+        )
+
+        self.assert_eq(
+            ps.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="right"),
+            pd.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="right"),
+        )
+
+        self.assert_eq(
+            ps.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="both"),
+            pd.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="both"),
+        )
+
+        self.assert_eq(
+            ps.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="neither"),
+            pd.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="neither"),
+        )
+
+        with self.assertRaisesRegex(
+            ValueError, "Inclusive has to be either 'both', 'neither', 'left' 
or 'right'"
+        ):
+            ps.date_range(start="2017-01-01", end="2017-01-04", 
inclusive="test")
+
         self.assertRaises(
             AssertionError, lambda: ps.date_range(start="1/1/2018", periods=5, 
tz="Asia/Tokyo")
         )


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-42621][PS] Add inclusive parameter for pd.date_range

Reply via email to