This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3b3e0fdc61c [SPARK-42621][PS] Add inclusive parameter for pd.date_range 3b3e0fdc61c is described below commit 3b3e0fdc61cf659bb97f94d9b12b8dcdad999e62 Author: Zhyhimont Dmitry <zhyhimon...@profitero.com> AuthorDate: Thu Aug 10 11:05:11 2023 +0900 [SPARK-42621][PS] Add inclusive parameter for pd.date_range ### What changes were proposed in this pull request? Add inclusive parameter for pd.date_range to support the pandas 2.0.0 ### Why are the changes needed? When pandas 2.0.0 is released, we should match the behavior in pandas API on Spark. ### Does this PR introduce any user-facing change? yes, the API changes Before: ps.date_range(start='2017-01-01', end='2017-01-04', closed=None) After: ps.date_range(start='2017-01-01', end='2017-01-04', inclusive="both") ### How was this patch tested? Unit tests were updated Closes #40665 from dzhigimont/SPARK-42621_ZH. Lead-authored-by: Zhyhimont Dmitry <zhyhimon...@profitero.com> Co-authored-by: Zhyhimont Dmitry <dzhigim...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/namespace.py | 32 +++++++++++++++++++++++++-- python/pyspark/pandas/tests/test_namespace.py | 25 +++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index ba93e5a3ee5..fddf1bec63f 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -1751,8 +1751,6 @@ def to_datetime( ) -# TODO(SPARK-42621): Add `inclusive` parameter. -# See https://github.com/pandas-dev/pandas/issues/40245 def date_range( start: Union[str, Any] = None, end: Union[str, Any] = None, @@ -1761,6 +1759,7 @@ def date_range( tz: Optional[Union[str, tzinfo]] = None, normalize: bool = False, name: Optional[str] = None, + inclusive: str = "both", **kwargs: Any, ) -> DatetimeIndex: """ @@ -1784,6 +1783,11 @@ def date_range( Normalize start/end dates to midnight before generating date range. name : str, default None Name of the resulting DatetimeIndex. + inclusive : {"both", "neither", "left", "right"}, default "both" + Include boundaries; Whether to set each bound as closed or open. + + .. versionadded:: 4.0.0 + **kwargs For compatibility. Has no effect on the result. @@ -1867,6 +1871,29 @@ def date_range( DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], dtype='datetime64[ns]', freq=None) + + `inclusive` controls whether to include `start` and `end` that are on the + boundary. The default includes boundary points on either end. + + >>> ps.date_range( + ... start='2017-01-01', end='2017-01-04', inclusive="both" + ... ) # doctest: +NORMALIZE_WHITESPACE + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], + dtype='datetime64[ns]', freq=None) + + Use ``inclusive='left'`` to exclude `end` if it falls on the boundary. + + >>> ps.date_range( + ... start='2017-01-01', end='2017-01-04', inclusive='left' + ... ) # doctest: +NORMALIZE_WHITESPACE + DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq=None) + + Use ``inclusive='right'`` to exclude `start` if it falls on the boundary. + + >>> ps.date_range( + ... start='2017-01-01', end='2017-01-04', inclusive='right' + ... ) # doctest: +NORMALIZE_WHITESPACE + DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq=None) """ assert freq not in ["N", "ns"], "nanoseconds is not supported" assert tz is None, "Localized DatetimeIndex is not supported" @@ -1882,6 +1909,7 @@ def date_range( tz=tz, normalize=normalize, name=name, + inclusive=inclusive, **kwargs, ) ), diff --git a/python/pyspark/pandas/tests/test_namespace.py b/python/pyspark/pandas/tests/test_namespace.py index d1d1e1af935..5c202046717 100644 --- a/python/pyspark/pandas/tests/test_namespace.py +++ b/python/pyspark/pandas/tests/test_namespace.py @@ -221,6 +221,31 @@ class NamespaceTestsMixin: pd.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)), ) + self.assert_eq( + ps.date_range(start="2017-01-01", end="2017-01-04", inclusive="left"), + pd.date_range(start="2017-01-01", end="2017-01-04", inclusive="left"), + ) + + self.assert_eq( + ps.date_range(start="2017-01-01", end="2017-01-04", inclusive="right"), + pd.date_range(start="2017-01-01", end="2017-01-04", inclusive="right"), + ) + + self.assert_eq( + ps.date_range(start="2017-01-01", end="2017-01-04", inclusive="both"), + pd.date_range(start="2017-01-01", end="2017-01-04", inclusive="both"), + ) + + self.assert_eq( + ps.date_range(start="2017-01-01", end="2017-01-04", inclusive="neither"), + pd.date_range(start="2017-01-01", end="2017-01-04", inclusive="neither"), + ) + + with self.assertRaisesRegex( + ValueError, "Inclusive has to be either 'both', 'neither', 'left' or 'right'" + ): + ps.date_range(start="2017-01-01", end="2017-01-04", inclusive="test") + self.assertRaises( AssertionError, lambda: ps.date_range(start="1/1/2018", periods=5, tz="Asia/Tokyo") ) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org