This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d96ab44c8251 [SPARK-48393][PYTHON] Move a group of constants to `pyspark.util` d96ab44c8251 is described below commit d96ab44c82519eec88b28df6974ddb5b7f429dbf Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Thu May 23 09:34:42 2024 +0900 [SPARK-48393][PYTHON] Move a group of constants to `pyspark.util` ### What changes were proposed in this pull request? Move a group of constants to `pyspark.util`, move them from connect to pyspark.util, so reusable in both ### Why are the changes needed? code clean up ### Does this PR introduce _any_ user-facing change? no, they are internal constants ### How was this patch tested? ci ### Was this patch authored or co-authored using generative AI tooling? no Closes #46710 from zhengruifeng/unity_constant. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/expressions.py | 4 +++- python/pyspark/sql/connect/types.py | 10 ---------- python/pyspark/sql/connect/window.py | 2 +- python/pyspark/sql/tests/connect/test_connect_column.py | 2 +- python/pyspark/sql/types.py | 16 +++++++--------- python/pyspark/sql/utils.py | 9 +++------ python/pyspark/util.py | 10 ++++++++++ 7 files changed, 25 insertions(+), 28 deletions(-) diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py index 4dc54793ed81..8cd386ba03ae 100644 --- a/python/pyspark/sql/connect/expressions.py +++ b/python/pyspark/sql/connect/expressions.py @@ -61,7 +61,7 @@ from pyspark.sql.types import ( ) import pyspark.sql.connect.proto as proto -from pyspark.sql.connect.types import ( +from pyspark.util import ( JVM_BYTE_MIN, JVM_BYTE_MAX, JVM_SHORT_MIN, @@ -70,6 +70,8 @@ from pyspark.sql.connect.types import ( JVM_INT_MAX, JVM_LONG_MIN, JVM_LONG_MAX, +) +from pyspark.sql.connect.types import ( UnparsedDataType, pyspark_types_to_proto_types, proto_schema_to_pyspark_data_type, diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py index f058c6390612..351fa0165965 100644 --- a/python/pyspark/sql/connect/types.py +++ b/python/pyspark/sql/connect/types.py @@ -55,16 +55,6 @@ from pyspark.errors import PySparkAssertionError, PySparkValueError import pyspark.sql.connect.proto as pb2 -JVM_BYTE_MIN: int = -(1 << 7) -JVM_BYTE_MAX: int = (1 << 7) - 1 -JVM_SHORT_MIN: int = -(1 << 15) -JVM_SHORT_MAX: int = (1 << 15) - 1 -JVM_INT_MIN: int = -(1 << 31) -JVM_INT_MAX: int = (1 << 31) - 1 -JVM_LONG_MIN: int = -(1 << 63) -JVM_LONG_MAX: int = (1 << 63) - 1 - - class UnparsedDataType(DataType): """ Unparsed data type. diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py index 04cf4c91d320..6fc1a1fac1e3 100644 --- a/python/pyspark/sql/connect/window.py +++ b/python/pyspark/sql/connect/window.py @@ -27,7 +27,7 @@ from pyspark.sql.connect.expressions import ( Expression, SortOrder, ) -from pyspark.sql.connect.types import ( +from pyspark.util import ( JVM_LONG_MIN, JVM_LONG_MAX, ) diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py b/python/pyspark/sql/tests/connect/test_connect_column.py index a9e3adb972e9..9a850dcae6f5 100644 --- a/python/pyspark/sql/tests/connect/test_connect_column.py +++ b/python/pyspark/sql/tests/connect/test_connect_column.py @@ -51,7 +51,7 @@ if should_test_connect: from pyspark.sql.connect import functions as CF from pyspark.sql.connect.column import Column from pyspark.sql.connect.expressions import DistributedSequenceID, LiteralExpression - from pyspark.sql.connect.types import ( + from pyspark.util import ( JVM_BYTE_MIN, JVM_BYTE_MAX, JVM_SHORT_MIN, diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index fa98d09a9af9..ee0cc9db5c44 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -45,7 +45,7 @@ from typing import ( TYPE_CHECKING, ) -from pyspark.util import is_remote_only +from pyspark.util import is_remote_only, JVM_INT_MAX from pyspark.serializers import CloudPickleSerializer from pyspark.sql.utils import ( has_numpy, @@ -104,8 +104,6 @@ __all__ = [ "VariantVal", ] -_JVM_INT_MAX: int = (1 << 31) - 1 - class DataType: """Base class for data types.""" @@ -756,7 +754,7 @@ class ArrayType(DataType): self, prefix: str, stringConcat: StringConcat, - maxDepth: int = _JVM_INT_MAX, + maxDepth: int = JVM_INT_MAX, ) -> None: if maxDepth > 0: stringConcat.append( @@ -905,7 +903,7 @@ class MapType(DataType): self, prefix: str, stringConcat: StringConcat, - maxDepth: int = _JVM_INT_MAX, + maxDepth: int = JVM_INT_MAX, ) -> None: if maxDepth > 0: stringConcat.append(f"{prefix}-- key: {self.keyType.typeName()}\n") @@ -1072,7 +1070,7 @@ class StructField(DataType): self, prefix: str, stringConcat: StringConcat, - maxDepth: int = _JVM_INT_MAX, + maxDepth: int = JVM_INT_MAX, ) -> None: if maxDepth > 0: stringConcat.append( @@ -1507,16 +1505,16 @@ class StructType(DataType): self, prefix: str, stringConcat: StringConcat, - maxDepth: int = _JVM_INT_MAX, + maxDepth: int = JVM_INT_MAX, ) -> None: for field in self.fields: field._build_formatted_string(prefix, stringConcat, maxDepth) - def treeString(self, maxDepth: int = _JVM_INT_MAX) -> str: + def treeString(self, maxDepth: int = JVM_INT_MAX) -> str: stringConcat = StringConcat() stringConcat.append("root\n") prefix = " |" - depth = maxDepth if maxDepth > 0 else _JVM_INT_MAX + depth = maxDepth if maxDepth > 0 else JVM_INT_MAX for field in self.fields: field._build_formatted_string(prefix, stringConcat, depth) return stringConcat.toString() diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py index 171f92e557a1..33e01ba378c4 100644 --- a/python/pyspark/sql/utils.py +++ b/python/pyspark/sql/utils.py @@ -43,7 +43,7 @@ from pyspark.errors import ( # noqa: F401 PySparkNotImplementedError, PySparkRuntimeError, ) -from pyspark.util import is_remote_only +from pyspark.util import is_remote_only, JVM_INT_MAX from pyspark.errors.exceptions.captured import CapturedException # noqa: F401 from pyspark.find_spark_home import _find_spark_home @@ -136,11 +136,8 @@ class ForeachBatchFunction: # Python implementation of 'org.apache.spark.sql.catalyst.util.StringConcat' -_MAX_ROUNDED_ARRAY_LENGTH = (1 << 31) - 1 - 15 - - class StringConcat: - def __init__(self, maxLength: int = _MAX_ROUNDED_ARRAY_LENGTH): + def __init__(self, maxLength: int = JVM_INT_MAX - 15): self.maxLength: int = maxLength self.strings: List[str] = [] self.length: int = 0 @@ -156,7 +153,7 @@ class StringConcat: stringToAppend = s if available >= sLen else s[0:available] self.strings.append(stringToAppend) - self.length = min(self.length + sLen, _MAX_ROUNDED_ARRAY_LENGTH) + self.length = min(self.length + sLen, JVM_INT_MAX - 15) def toString(self) -> str: # finalLength = self.maxLength if self.atLimit() else self.length diff --git a/python/pyspark/util.py b/python/pyspark/util.py index 4920ba957c19..49766913e6ee 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -71,6 +71,16 @@ if typing.TYPE_CHECKING: from pyspark.sql import SparkSession +JVM_BYTE_MIN: int = -(1 << 7) +JVM_BYTE_MAX: int = (1 << 7) - 1 +JVM_SHORT_MIN: int = -(1 << 15) +JVM_SHORT_MAX: int = (1 << 15) - 1 +JVM_INT_MIN: int = -(1 << 31) +JVM_INT_MAX: int = (1 << 31) - 1 +JVM_LONG_MIN: int = -(1 << 63) +JVM_LONG_MAX: int = (1 << 63) - 1 + + def print_exec(stream: TextIO) -> None: ei = sys.exc_info() traceback.print_exception(ei[0], ei[1], ei[2], None, stream) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org