(spark) branch master updated: [SPARK-48393][PYTHON] Move a group of constants to `pyspark.util`

gurwls223 Wed, 22 May 2024 17:35:37 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new d96ab44c8251 [SPARK-48393][PYTHON] Move a group of constants to 
`pyspark.util`
d96ab44c8251 is described below

commit d96ab44c82519eec88b28df6974ddb5b7f429dbf
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Thu May 23 09:34:42 2024 +0900

    [SPARK-48393][PYTHON] Move a group of constants to `pyspark.util`
    
    ### What changes were proposed in this pull request?
    Move a group of constants to `pyspark.util`, move them from connect to 
pyspark.util, so reusable in both
    
    ### Why are the changes needed?
    code clean up
    
    ### Does this PR introduce _any_ user-facing change?
    no, they are internal constants
    
    ### How was this patch tested?
    ci
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #46710 from zhengruifeng/unity_constant.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 python/pyspark/sql/connect/expressions.py               |  4 +++-
 python/pyspark/sql/connect/types.py                     | 10 ----------
 python/pyspark/sql/connect/window.py                    |  2 +-
 python/pyspark/sql/tests/connect/test_connect_column.py |  2 +-
 python/pyspark/sql/types.py                             | 16 +++++++---------
 python/pyspark/sql/utils.py                             |  9 +++------
 python/pyspark/util.py                                  | 10 ++++++++++
 7 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/python/pyspark/sql/connect/expressions.py 
b/python/pyspark/sql/connect/expressions.py
index 4dc54793ed81..8cd386ba03ae 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -61,7 +61,7 @@ from pyspark.sql.types import (
 )
 
 import pyspark.sql.connect.proto as proto
-from pyspark.sql.connect.types import (
+from pyspark.util import (
     JVM_BYTE_MIN,
     JVM_BYTE_MAX,
     JVM_SHORT_MIN,
@@ -70,6 +70,8 @@ from pyspark.sql.connect.types import (
     JVM_INT_MAX,
     JVM_LONG_MIN,
     JVM_LONG_MAX,
+)
+from pyspark.sql.connect.types import (
     UnparsedDataType,
     pyspark_types_to_proto_types,
     proto_schema_to_pyspark_data_type,
diff --git a/python/pyspark/sql/connect/types.py 
b/python/pyspark/sql/connect/types.py
index f058c6390612..351fa0165965 100644
--- a/python/pyspark/sql/connect/types.py
+++ b/python/pyspark/sql/connect/types.py
@@ -55,16 +55,6 @@ from pyspark.errors import PySparkAssertionError, 
PySparkValueError
 import pyspark.sql.connect.proto as pb2
 
 
-JVM_BYTE_MIN: int = -(1 << 7)
-JVM_BYTE_MAX: int = (1 << 7) - 1
-JVM_SHORT_MIN: int = -(1 << 15)
-JVM_SHORT_MAX: int = (1 << 15) - 1
-JVM_INT_MIN: int = -(1 << 31)
-JVM_INT_MAX: int = (1 << 31) - 1
-JVM_LONG_MIN: int = -(1 << 63)
-JVM_LONG_MAX: int = (1 << 63) - 1
-
-
 class UnparsedDataType(DataType):
     """
     Unparsed data type.
diff --git a/python/pyspark/sql/connect/window.py 
b/python/pyspark/sql/connect/window.py
index 04cf4c91d320..6fc1a1fac1e3 100644
--- a/python/pyspark/sql/connect/window.py
+++ b/python/pyspark/sql/connect/window.py
@@ -27,7 +27,7 @@ from pyspark.sql.connect.expressions import (
     Expression,
     SortOrder,
 )
-from pyspark.sql.connect.types import (
+from pyspark.util import (
     JVM_LONG_MIN,
     JVM_LONG_MAX,
 )
diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py 
b/python/pyspark/sql/tests/connect/test_connect_column.py
index a9e3adb972e9..9a850dcae6f5 100644
--- a/python/pyspark/sql/tests/connect/test_connect_column.py
+++ b/python/pyspark/sql/tests/connect/test_connect_column.py
@@ -51,7 +51,7 @@ if should_test_connect:
     from pyspark.sql.connect import functions as CF
     from pyspark.sql.connect.column import Column
     from pyspark.sql.connect.expressions import DistributedSequenceID, 
LiteralExpression
-    from pyspark.sql.connect.types import (
+    from pyspark.util import (
         JVM_BYTE_MIN,
         JVM_BYTE_MAX,
         JVM_SHORT_MIN,
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index fa98d09a9af9..ee0cc9db5c44 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -45,7 +45,7 @@ from typing import (
     TYPE_CHECKING,
 )
 
-from pyspark.util import is_remote_only
+from pyspark.util import is_remote_only, JVM_INT_MAX
 from pyspark.serializers import CloudPickleSerializer
 from pyspark.sql.utils import (
     has_numpy,
@@ -104,8 +104,6 @@ __all__ = [
     "VariantVal",
 ]
 
-_JVM_INT_MAX: int = (1 << 31) - 1
-
 
 class DataType:
     """Base class for data types."""
@@ -756,7 +754,7 @@ class ArrayType(DataType):
         self,
         prefix: str,
         stringConcat: StringConcat,
-        maxDepth: int = _JVM_INT_MAX,
+        maxDepth: int = JVM_INT_MAX,
     ) -> None:
         if maxDepth > 0:
             stringConcat.append(
@@ -905,7 +903,7 @@ class MapType(DataType):
         self,
         prefix: str,
         stringConcat: StringConcat,
-        maxDepth: int = _JVM_INT_MAX,
+        maxDepth: int = JVM_INT_MAX,
     ) -> None:
         if maxDepth > 0:
             stringConcat.append(f"{prefix}-- key: {self.keyType.typeName()}\n")
@@ -1072,7 +1070,7 @@ class StructField(DataType):
         self,
         prefix: str,
         stringConcat: StringConcat,
-        maxDepth: int = _JVM_INT_MAX,
+        maxDepth: int = JVM_INT_MAX,
     ) -> None:
         if maxDepth > 0:
             stringConcat.append(
@@ -1507,16 +1505,16 @@ class StructType(DataType):
         self,
         prefix: str,
         stringConcat: StringConcat,
-        maxDepth: int = _JVM_INT_MAX,
+        maxDepth: int = JVM_INT_MAX,
     ) -> None:
         for field in self.fields:
             field._build_formatted_string(prefix, stringConcat, maxDepth)
 
-    def treeString(self, maxDepth: int = _JVM_INT_MAX) -> str:
+    def treeString(self, maxDepth: int = JVM_INT_MAX) -> str:
         stringConcat = StringConcat()
         stringConcat.append("root\n")
         prefix = " |"
-        depth = maxDepth if maxDepth > 0 else _JVM_INT_MAX
+        depth = maxDepth if maxDepth > 0 else JVM_INT_MAX
         for field in self.fields:
             field._build_formatted_string(prefix, stringConcat, depth)
         return stringConcat.toString()
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 171f92e557a1..33e01ba378c4 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -43,7 +43,7 @@ from pyspark.errors import (  # noqa: F401
     PySparkNotImplementedError,
     PySparkRuntimeError,
 )
-from pyspark.util import is_remote_only
+from pyspark.util import is_remote_only, JVM_INT_MAX
 from pyspark.errors.exceptions.captured import CapturedException  # noqa: F401
 from pyspark.find_spark_home import _find_spark_home
 
@@ -136,11 +136,8 @@ class ForeachBatchFunction:
 
 
 # Python implementation of 'org.apache.spark.sql.catalyst.util.StringConcat'
-_MAX_ROUNDED_ARRAY_LENGTH = (1 << 31) - 1 - 15
-
-
 class StringConcat:
-    def __init__(self, maxLength: int = _MAX_ROUNDED_ARRAY_LENGTH):
+    def __init__(self, maxLength: int = JVM_INT_MAX - 15):
         self.maxLength: int = maxLength
         self.strings: List[str] = []
         self.length: int = 0
@@ -156,7 +153,7 @@ class StringConcat:
                 stringToAppend = s if available >= sLen else s[0:available]
                 self.strings.append(stringToAppend)
 
-            self.length = min(self.length + sLen, _MAX_ROUNDED_ARRAY_LENGTH)
+            self.length = min(self.length + sLen, JVM_INT_MAX - 15)
 
     def toString(self) -> str:
         # finalLength = self.maxLength if self.atLimit()  else self.length
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 4920ba957c19..49766913e6ee 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -71,6 +71,16 @@ if typing.TYPE_CHECKING:
     from pyspark.sql import SparkSession
 
 
+JVM_BYTE_MIN: int = -(1 << 7)
+JVM_BYTE_MAX: int = (1 << 7) - 1
+JVM_SHORT_MIN: int = -(1 << 15)
+JVM_SHORT_MAX: int = (1 << 15) - 1
+JVM_INT_MIN: int = -(1 << 31)
+JVM_INT_MAX: int = (1 << 31) - 1
+JVM_LONG_MIN: int = -(1 << 63)
+JVM_LONG_MAX: int = (1 << 63) - 1
+
+
 def print_exec(stream: TextIO) -> None:
     ei = sys.exc_info()
     traceback.print_exception(ei[0], ei[1], ei[2], None, stream)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-48393][PYTHON] Move a group of constants to `pyspark.util`

Reply via email to