This is an automated email from the ASF dual-hosted git repository. sarutak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8687138 Revert "[SPARK-36894][SPARK-37077][PYTHON] Synchronize RDD.toDF annotations with SparkSession and SQLContext .createDataFrame variants." 8687138 is described below commit 86871386b063d8f7a8b5b42eb327a3900525af58 Author: Kousuke Saruta <saru...@oss.nttdata.com> AuthorDate: Wed Nov 3 23:01:38 2021 +0900 Revert "[SPARK-36894][SPARK-37077][PYTHON] Synchronize RDD.toDF annotations with SparkSession and SQLContext .createDataFrame variants." This reverts commit 855da09f02f3007a2c36e7a738d4dc81fd95569a. See [this comment](https://github.com/apache/spark/pull/34146#issuecomment-959136935). Closes #34477 from sarutak/revert-SPARK-37077. Authored-by: Kousuke Saruta <saru...@oss.nttdata.com> Signed-off-by: Kousuke Saruta <saru...@oss.nttdata.com> --- python/pyspark/rdd.pyi | 15 ++++----------- python/pyspark/sql/_typing.pyi | 11 ----------- python/pyspark/sql/context.py | 38 ++++++++++++++++++++++---------------- python/pyspark/sql/session.py | 40 +++++++++++----------------------------- 4 files changed, 37 insertions(+), 67 deletions(-) diff --git a/python/pyspark/rdd.pyi b/python/pyspark/rdd.pyi index 84481d3..a810a2c 100644 --- a/python/pyspark/rdd.pyi +++ b/python/pyspark/rdd.pyi @@ -55,8 +55,8 @@ from pyspark.resource.requests import ( # noqa: F401 from pyspark.resource.profile import ResourceProfile from pyspark.statcounter import StatCounter from pyspark.sql.dataframe import DataFrame -from pyspark.sql.types import AtomicType, StructType -from pyspark.sql._typing import AtomicValue, RowLike +from pyspark.sql.types import StructType +from pyspark.sql._typing import RowLike from py4j.java_gateway import JavaObject # type: ignore[import] T = TypeVar("T") @@ -445,18 +445,11 @@ class RDD(Generic[T]): @overload def toDF( self: RDD[RowLike], - schema: Optional[Union[List[str], Tuple[str, ...]]] = ..., + schema: Optional[List[str]] = ..., sampleRatio: Optional[float] = ..., ) -> DataFrame: ... @overload - def toDF( - self: RDD[RowLike], schema: Optional[Union[StructType, str]] = ... - ) -> DataFrame: ... - @overload - def toDF( - self: RDD[AtomicValue], - schema: Union[AtomicType, str], - ) -> DataFrame: ... + def toDF(self: RDD[RowLike], schema: Optional[StructType] = ...) -> DataFrame: ... class RDDBarrier(Generic[T]): rdd: RDD[T] diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi index b6b4606..1a3bd8f 100644 --- a/python/pyspark/sql/_typing.pyi +++ b/python/pyspark/sql/_typing.pyi @@ -42,17 +42,6 @@ AtomicDataTypeOrString = Union[pyspark.sql.types.AtomicType, str] DataTypeOrString = Union[pyspark.sql.types.DataType, str] OptionalPrimitiveType = Optional[PrimitiveType] -AtomicValue = TypeVar( - "AtomicValue", - datetime.datetime, - datetime.date, - decimal.Decimal, - bool, - str, - int, - float, -) - RowLike = TypeVar("RowLike", List[Any], Tuple[Any, ...], pyspark.sql.types.Row) class SupportsOpen(Protocol): diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index eba2087..7d27c55 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -48,11 +48,13 @@ from pyspark.conf import SparkConf if TYPE_CHECKING: from pyspark.sql._typing import ( - AtomicValue, - RowLike, UserDefinedFunctionLike, + RowLike, + DateTimeLiteral, + LiteralType, + DecimalLiteral ) - from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike + from pyspark.sql.pandas._typing import DataFrameLike __all__ = ["SQLContext", "HiveContext"] @@ -321,8 +323,7 @@ class SQLContext(object): @overload def createDataFrame( self, - data: Union["RDD[RowLike]", Iterable["RowLike"]], - schema: Union[List[str], Tuple[str, ...]] = ..., + data: Iterable["RowLike"], samplingRatio: Optional[float] = ..., ) -> DataFrame: ... @@ -330,9 +331,8 @@ class SQLContext(object): @overload def createDataFrame( self, - data: Union["RDD[RowLike]", Iterable["RowLike"]], - schema: Union[StructType, str], - *, + data: Iterable["RowLike"], + schema: Union[List[str], Tuple[str, ...]] = ..., verifySchema: bool = ..., ) -> DataFrame: ... @@ -340,10 +340,7 @@ class SQLContext(object): @overload def createDataFrame( self, - data: Union[ - "RDD[AtomicValue]", - Iterable["AtomicValue"], - ], + data: Iterable[Union["DateTimeLiteral", "LiteralType", "DecimalLiteral"]], schema: Union[AtomicType, str], verifySchema: bool = ..., ) -> DataFrame: @@ -351,14 +348,23 @@ class SQLContext(object): @overload def createDataFrame( - self, data: "PandasDataFrameLike", samplingRatio: Optional[float] = ... + self, + data: Iterable["RowLike"], + schema: Union[StructType, str], + verifySchema: bool = ..., + ) -> DataFrame: + ... + + @overload + def createDataFrame( + self, data: "DataFrameLike", samplingRatio: Optional[float] = ... ) -> DataFrame: ... @overload def createDataFrame( self, - data: "PandasDataFrameLike", + data: "DataFrameLike", schema: Union[StructType, str], verifySchema: bool = ..., ) -> DataFrame: @@ -366,8 +372,8 @@ class SQLContext(object): def createDataFrame( # type: ignore[misc] self, - data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"], - schema: Optional[Union[AtomicType, StructType, str]] = None, + data: Iterable["RowLike"], + schema: Optional[Union[List[str], Tuple[str, ...]]] = None, samplingRatio: Optional[float] = None, verifySchema: bool = True ) -> DataFrame: diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index de0f9e3..728d658 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -42,7 +42,7 @@ from pyspark.sql.types import ( from pyspark.sql.utils import install_exception_handler, is_timestamp_ntz_preferred if TYPE_CHECKING: - from pyspark.sql._typing import AtomicValue, RowLike + from pyspark.sql._typing import DateTimeLiteral, LiteralType, DecimalLiteral, RowLike from pyspark.sql.catalog import Catalog from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike from pyspark.sql.streaming import StreamingQueryManager @@ -628,8 +628,7 @@ class SparkSession(SparkConversionMixin): @overload def createDataFrame( self, - data: Iterable["RowLike"], - schema: Union[List[str], Tuple[str, ...]] = ..., + data: Union["RDD[RowLike]", Iterable["RowLike"]], samplingRatio: Optional[float] = ..., ) -> DataFrame: ... @@ -637,36 +636,19 @@ class SparkSession(SparkConversionMixin): @overload def createDataFrame( self, - data: "RDD[RowLike]", + data: Union["RDD[RowLike]", Iterable["RowLike"]], schema: Union[List[str], Tuple[str, ...]] = ..., - samplingRatio: Optional[float] = ..., - ) -> DataFrame: - ... - - @overload - def createDataFrame( - self, - data: Iterable["RowLike"], - schema: Union[StructType, str], - *, verifySchema: bool = ..., ) -> DataFrame: ... @overload def createDataFrame( - self, - data: "RDD[RowLike]", - schema: Union[StructType, str], - *, - verifySchema: bool = ..., - ) -> DataFrame: - ... - - @overload - def createDataFrame( self, - data: "RDD[AtomicValue]", + data: Union[ + "RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral]]", + Iterable[Union["DateTimeLiteral", "LiteralType", "DecimalLiteral"]], + ], schema: Union[AtomicType, str], verifySchema: bool = ..., ) -> DataFrame: @@ -674,10 +656,10 @@ class SparkSession(SparkConversionMixin): @overload def createDataFrame( - self, - data: Iterable["AtomicValue"], - schema: Union[AtomicType, str], - verifySchema: bool = ..., + self, + data: Union["RDD[RowLike]", Iterable["RowLike"]], + schema: Union[StructType, str], + verifySchema: bool = ..., ) -> DataFrame: ... --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org