This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 139536c [SPARK-36142][PYTHON] Follow Pandas when pow between fractional series with Na and bool literal 139536c is described below commit 139536c3ed91c9bfd93e169feeec1fe9dd220aca Author: Yikun Jiang <yikunk...@gmail.com> AuthorDate: Tue Jul 27 12:06:05 2021 +0900 [SPARK-36142][PYTHON] Follow Pandas when pow between fractional series with Na and bool literal ### What changes were proposed in this pull request? Set the result to 1 when the exp with 0(or False). ### Why are the changes needed? Currently, exponentiation between fractional series and bools is not consistent with pandas' behavior. ``` >>> pser = pd.Series([1, 2, np.nan], dtype=float) >>> psser = ps.from_pandas(pser) >>> pser ** False 0 1.0 1 1.0 2 1.0 dtype: float64 >>> psser ** False 0 1.0 1 1.0 2 NaN dtype: float64 ``` We ought to adjust that. See more in [SPARK-36142](https://issues.apache.org/jira/browse/SPARK-36142) ### Does this PR introduce _any_ user-facing change? Yes, it introduces a user-facing change, resulting in a different result for pow between fractional Series with missing values and bool literal, the results follow pandas behavior. ### How was this patch tested? - Add test_pow_with_float_nan ut - Exsiting test in test_pow Closes #33521 from Yikun/SPARK-36142. Authored-by: Yikun Jiang <yikunk...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit d52c2de08b60930a129825d15e8f822c07e8bd31) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/pandas/data_type_ops/num_ops.py | 6 +++++- python/pyspark/pandas/tests/data_type_ops/test_num_ops.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py index 8de46cd..43c3b01 100644 --- a/python/pyspark/pandas/data_type_ops/num_ops.py +++ b/python/pyspark/pandas/data_type_ops/num_ops.py @@ -79,7 +79,11 @@ class NumericOps(DataTypeOps): raise TypeError("Exponentiation can not be applied to given types.") def pow_func(left: Column, right: Any) -> Column: - return F.when(left == 1, left).otherwise(Column.__pow__(left, right)) + return ( + F.when(left == 1, left) + .when(SF.lit(right) == 0, 1) + .otherwise(Column.__pow__(left, right)) + ) right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type) return column_op(pow_func)(left, right) diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py index fb2d2be..0dd3501 100644 --- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py +++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py @@ -183,6 +183,18 @@ class NumOpsTest(PandasOnSparkTestCase, TestCasesUtils): else: self.assertRaises(TypeError, lambda: psser ** psdf[n_col]) + # TODO(SPARK-36031): Merge test_pow_with_nan into test_pow + def test_pow_with_float_nan(self): + for col in self.numeric_w_nan_df_cols: + if col == "float_w_nan": + pser, psser = self.numeric_w_nan_pdf[col], self.numeric_w_nan_psdf[col] + self.assert_eq(pser ** pser, psser ** psser) + self.assert_eq(pser ** pser.astype(bool), psser ** psser.astype(bool)) + self.assert_eq(pser ** True, psser ** True) + self.assert_eq(pser ** False, psser ** False) + self.assert_eq(pser ** 1, psser ** 1) + self.assert_eq(pser ** 0, psser ** 0) + def test_radd(self): pdf, psdf = self.pdf, self.psdf for col in self.numeric_df_cols: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org