This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 383d9fb4df8 [SPARK-43680][SPARK-43681][SPARK-43682][SPARK-43683][PS] Fix `NullOps` for Spark Connect 383d9fb4df8 is described below commit 383d9fb4df81429d2d7d31a35f200ff152bf77f6 Author: itholic <haejoon....@databricks.com> AuthorDate: Mon May 29 19:38:27 2023 +0800 [SPARK-43680][SPARK-43681][SPARK-43682][SPARK-43683][PS] Fix `NullOps` for Spark Connect ### What changes were proposed in this pull request? This PR proposes to fix `NullOps` test for pandas API on Spark with Spark Connect. This includes SPARK-43680, SPARK-43681, SPARK-43682, SPARK-43683 at once, because they are all related similar modifications in single file. ### Why are the changes needed? To support all features for pandas API on Spark with Spark Connect. ### Does this PR introduce _any_ user-facing change? Yes, `NullOps.lt`, `NullOps.le`, `NullOps.ge`, `NullOps.gt` are now working as expected on Spark Connect. ### How was this patch tested? Uncomment the UTs, and tested manually. Closes #41361 from itholic/SPARK-43680-3. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- python/pyspark/pandas/data_type_ops/null_ops.py | 34 +++++++++++++--------- .../connect/data_type_ops/test_parity_null_ops.py | 16 ---------- 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/python/pyspark/pandas/data_type_ops/null_ops.py b/python/pyspark/pandas/data_type_ops/null_ops.py index 9205d5e2407..ddd7bddcfbd 100644 --- a/python/pyspark/pandas/data_type_ops/null_ops.py +++ b/python/pyspark/pandas/data_type_ops/null_ops.py @@ -30,8 +30,8 @@ from pyspark.pandas.data_type_ops.base import ( ) from pyspark.pandas._typing import SeriesOrIndex from pyspark.pandas.typedef import pandas_on_spark_type -from pyspark.sql import Column from pyspark.sql.types import BooleanType, StringType +from pyspark.sql.utils import pyspark_column_op, is_remote class NullOps(DataTypeOps): @@ -44,28 +44,36 @@ class NullOps(DataTypeOps): return "nulls" def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - _sanitize_list_like(right) - return column_op(Column.__lt__)(left, right) + result = pyspark_column_op("__lt__")(left, right) + if is_remote: + # In Spark Connect, it returns None instead of False, so we manually cast it. + result = result.fillna(False) + return result def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - _sanitize_list_like(right) - return column_op(Column.__le__)(left, right) + result = pyspark_column_op("__le__")(left, right) + if is_remote: + # In Spark Connect, it returns None instead of False, so we manually cast it. + result = result.fillna(False) + return result def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - _sanitize_list_like(right) - return column_op(Column.__ge__)(left, right) + result = pyspark_column_op("__ge__")(left, right) + if is_remote: + # In Spark Connect, it returns None instead of False, so we manually cast it. + result = result.fillna(False) + return result def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex: - from pyspark.pandas.base import column_op - _sanitize_list_like(right) - return column_op(Column.__gt__)(left, right) + result = pyspark_column_op("__gt__")(left, right) + if is_remote: + # In Spark Connect, it returns None instead of False, so we manually cast it. + result = result.fillna(False) + return result def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike: dtype, spark_type = pandas_on_spark_type(dtype) diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py index 00bfb75087a..1b53a064971 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/test_parity_null_ops.py @@ -33,22 +33,6 @@ class NullOpsParityTests( def test_eq(self): super().test_eq() - @unittest.skip("TODO(SPARK-43680): Fix NullOps.ge to work with Spark Connect Column.") - def test_ge(self): - super().test_ge() - - @unittest.skip("TODO(SPARK-43681): Fix NullOps.gt to work with Spark Connect Column.") - def test_gt(self): - super().test_gt() - - @unittest.skip("TODO(SPARK-43682): Fix NullOps.le to work with Spark Connect Column.") - def test_le(self): - super().test_le() - - @unittest.skip("TODO(SPARK-43683): Fix NullOps.lt to work with Spark Connect Column.") - def test_lt(self): - super().test_lt() - @unittest.skip("TODO(SPARK-43685): Fix NullOps.ne to work with Spark Connect Column.") def test_ne(self): super().test_ne() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org