This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 462d4565cd2 [SPARK-43111][PS][CONNECT][PYTHON] Merge nested `if` 
statements into single `if` statements
462d4565cd2 is described below

commit 462d4565cd2782fe805c9871eeab2d969c79369f
Author: Bjørn Jørgensen <bjornjorgen...@gmail.com>
AuthorDate: Tue Apr 18 13:13:10 2023 +0900

    [SPARK-43111][PS][CONNECT][PYTHON] Merge nested `if` statements into single 
`if` statements
    
    ### What changes were proposed in this pull request?
    This PR aims to simplify the code by merging nested `if` statements into 
single `if` statements using the `and` operator.
    
    There are 7 of these according to 
[Sonarcloud](https://sonarcloud.io/project/issues?languages=py&resolved=false&rules=python%3AS1066&id=spark-python&open=AYQdnXXBRrJbVxW9ZDpw).
 And this PR fix them all.
    
    ### Why are the changes needed?
    The changes do not affect the functionality of the code, but they improve 
readability and maintainability.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Pass GA.
    
    Closes #40759 from bjornjorgensen/Merge-if-with-the-enclosing-one.
    
    Lead-authored-by: Bjørn Jørgensen <bjornjorgen...@gmail.com>
    Co-authored-by: bjornjorgensen <bjornjorgen...@gmail.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/accumulators.py                     |  5 ++---
 python/pyspark/pandas/frame.py                     | 17 ++++++++++-------
 python/pyspark/pandas/groupby.py                   | 17 ++++++++---------
 python/pyspark/pandas/indexes/base.py              |  9 ++++-----
 python/pyspark/pandas/namespace.py                 |  5 ++---
 python/pyspark/sql/connect/streaming/readwriter.py | 11 +++++------
 6 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index fe775a37ed8..ce4bb561814 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -249,9 +249,8 @@ class 
_UpdateRequestHandler(SocketServer.StreamRequestHandler):
             while not self.server.server_shutdown:  # type: 
ignore[attr-defined]
                 # Poll every 1 second for new data -- don't block in case of 
shutdown.
                 r, _, _ = select.select([self.rfile], [], [], 1)
-                if self.rfile in r:
-                    if func():
-                        break
+                if self.rfile in r and func():
+                    break
 
         def accum_updates() -> bool:
             num_updates = read_int(self.rfile)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 8bddcb6bae8..d1c10223432 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -8915,15 +8915,19 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
             if len(index_scols) != other._internal.index_level:
                 raise ValueError("Both DataFrames have to have the same number 
of index levels")
 
-            if verify_integrity and len(index_scols) > 0:
-                if (
+            if (
+                verify_integrity
+                and len(index_scols) > 0
+                and (
                     self._internal.spark_frame.select(index_scols)
                     .intersect(
                         
other._internal.spark_frame.select(other._internal.index_spark_columns)
                     )
                     .count()
-                ) > 0:
-                    raise ValueError("Indices have overlapping values")
+                )
+                > 0
+            ):
+                raise ValueError("Indices have overlapping values")
 
         # Lazy import to avoid circular dependency issues
         from pyspark.pandas.namespace import concat
@@ -11581,9 +11585,8 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
 
             index_columns = psdf._internal.index_spark_column_names
             num_indices = len(index_columns)
-            if level:
-                if level < 0 or level >= num_indices:
-                    raise ValueError("level should be an integer between [0, 
%s)" % num_indices)
+            if level is not None and (level < 0 or level >= num_indices):
+                raise ValueError("level should be an integer between [0, %s)" 
% num_indices)
 
             @pandas_udf(returnType=index_mapper_ret_stype)  # type: 
ignore[call-overload]
             def index_mapper_udf(s: pd.Series) -> pd.Series:
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 01687c3fd16..01bc72cd809 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -3550,15 +3550,14 @@ class GroupBy(Generic[FrameLike], metaclass=ABCMeta):
             if isinstance(self, SeriesGroupBy):
                 raise TypeError("Only numeric aggregation column is accepted.")
 
-            if not numeric_only:
-                if has_non_numeric:
-                    warnings.warn(
-                        "Dropping invalid columns in DataFrameGroupBy.%s is 
deprecated. "
-                        "In a future version, a TypeError will be raised. "
-                        "Before calling .%s, select only columns which should 
be "
-                        "valid for the function." % (function_name, 
function_name),
-                        FutureWarning,
-                    )
+            if not numeric_only and has_non_numeric:
+                warnings.warn(
+                    "Dropping invalid columns in DataFrameGroupBy.%s is 
deprecated. "
+                    "In a future version, a TypeError will be raised. "
+                    "Before calling .%s, select only columns which should be "
+                    "valid for the function." % (function_name, function_name),
+                    FutureWarning,
+                )
 
     def _reduce_for_stat_function(
         self,
diff --git a/python/pyspark/pandas/indexes/base.py 
b/python/pyspark/pandas/indexes/base.py
index 4e8de350998..e38cf267720 100644
--- a/python/pyspark/pandas/indexes/base.py
+++ b/python/pyspark/pandas/indexes/base.py
@@ -2095,11 +2095,10 @@ class Index(IndexOpsMixin):
         """
         from pyspark.pandas.indexes.multi import MultiIndex
 
-        if isinstance(self, MultiIndex):
-            if level is not None:
-                self_names = self.names
-                self_names[level] = names  # type: ignore[index]
-                names = self_names
+        if isinstance(self, MultiIndex) and level is not None:
+            self_names = self.names
+            self_names[level] = names  # type: ignore[index]
+            names = self_names
         return self.rename(name=names, inplace=inplace)
 
     def difference(self, other: "Index", sort: Optional[bool] = None) -> 
"Index":
diff --git a/python/pyspark/pandas/namespace.py 
b/python/pyspark/pandas/namespace.py
index fdd9f86e402..b0a6073b813 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -2190,9 +2190,8 @@ def get_dummies(
     if sparse is not False:
         raise NotImplementedError("get_dummies currently does not support 
sparse")
 
-    if columns is not None:
-        if not is_list_like(columns):
-            raise TypeError("Input must be a list-like for parameter 
`columns`")
+    if columns is not None and not is_list_like(columns):
+        raise TypeError("Input must be a list-like for parameter `columns`")
 
     if dtype is None:
         dtype = "byte"
diff --git a/python/pyspark/sql/connect/streaming/readwriter.py 
b/python/pyspark/sql/connect/streaming/readwriter.py
index eb78c374965..0775c1ab4a3 100644
--- a/python/pyspark/sql/connect/streaming/readwriter.py
+++ b/python/pyspark/sql/connect/streaming/readwriter.py
@@ -94,12 +94,11 @@ class DataStreamReader(OptionUtils):
         if schema is not None:
             self.schema(schema)
         self.options(**options)
-        if path is not None:
-            if type(path) != str or len(path.strip()) == 0:
-                raise ValueError(
-                    "If the path is provided for stream, it needs to be a "
-                    + "non-empty string. List of paths are not supported."
-                )
+        if path is not None and (type(path) != str or len(path.strip()) == 0):
+            raise ValueError(
+                "If the path is provided for stream, it needs to be a "
+                + "non-empty string. List of paths are not supported."
+            )
 
         plan = DataSource(
             format=self._format,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to