This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new 824426e [SPARK-36225][PYTHON][DOCS] Use DataFrame in python docstrings 824426e is described below commit 824426e7da86a3effccef1b99f0bd7e5c776e496 Author: Dominik Gehl <d...@open.ch> AuthorDate: Sat Jul 24 16:58:10 2021 +0900 [SPARK-36225][PYTHON][DOCS] Use DataFrame in python docstrings ### What changes were proposed in this pull request? Changing references to Dataset in python docstrings to DataFrame ### Why are the changes needed? no Dataset class in pyspark ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Doc change only Closes #33438 from dominikgehl/feature/SPARK-36225. Lead-authored-by: Dominik Gehl <d...@open.ch> Co-authored-by: Dominik Gehl <g...@fastmail.fm> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit ae1c20ee0dc24bd35cd15380e814f06e07314af2) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/ml/util.py | 7 ++++--- python/pyspark/sql/dataframe.py | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index 21d2725..553b834 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -351,7 +351,8 @@ class DefaultParamsWritable(MLWritable): class stores all data as :py:class:`Param` values, then extending this trait will provide a default implementation of writing saved instances of the class. This only handles simple :py:class:`Param` types; e.g., it will not handle - :py:class:`Dataset`. See :py:class:`DefaultParamsReadable`, the counterpart to this trait. + :py:class:`pyspark.sql.DataFrame`. See :py:class:`DefaultParamsReadable`, the counterpart + to this class. .. versionadded:: 2.3.0 """ @@ -460,8 +461,8 @@ class DefaultParamsReadable(MLReadable): If a :py:class:`Params` class stores all data as :py:class:`Param` values, then extending this trait will provide a default implementation of reading saved instances of the class. This only handles simple :py:class:`Param` types; - e.g., it will not handle :py:class:`Dataset`. See :py:class:`DefaultParamsWritable`, - the counterpart to this trait. + e.g., it will not handle :py:class:`pyspark.sql.DataFrame`. See + :py:class:`DefaultParamsWritable`, the counterpart to this class. .. versionadded:: 2.3.0 """ diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 3085092..4e45a6b 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -116,7 +116,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode)) def registerTempTable(self, name): - """Registers this DataFrame as a temporary table using the given name. + """Registers this :class:`DataFrame` as a temporary table using the given name. The lifetime of this temporary table is tied to the :class:`SparkSession` that was used to create this :class:`DataFrame`. @@ -422,12 +422,12 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): @property def isStreaming(self): - """Returns ``True`` if this :class:`Dataset` contains one or more sources that continuously - return data as it arrives. A :class:`Dataset` that reads data from a streaming source - must be executed as a :class:`StreamingQuery` using the :func:`start` method in - :class:`DataStreamWriter`. Methods that return a single answer, (e.g., :func:`count` or - :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming - source present. + """Returns ``True`` if this :class:`DataFrame` contains one or more sources that + continuously return data as it arrives. A :class:`DataFrame` that reads data from a + streaming source must be executed as a :class:`StreamingQuery` using the :func:`start` + method in :class:`DataStreamWriter`. Methods that return a single answer, (e.g., + :func:`count` or :func:`collect`) will throw an :class:`AnalysisException` when there + is a streaming source present. .. versionadded:: 2.0.0 @@ -527,10 +527,10 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): return None def checkpoint(self, eager=True): - """Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the - logical plan of this :class:`DataFrame`, which is especially useful in iterative algorithms - where the plan may grow exponentially. It will be saved to files inside the checkpoint - directory set with :meth:`SparkContext.setCheckpointDir`. + """Returns a checkpointed version of this :class:`DataFrame`. Checkpointing can be used to + truncate the logical plan of this :class:`DataFrame`, which is especially useful in + iterative algorithms where the plan may grow exponentially. It will be saved to files + inside the checkpoint directory set with :meth:`SparkContext.setCheckpointDir`. .. versionadded:: 2.1.0 @@ -547,8 +547,8 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): return DataFrame(jdf, self.sql_ctx) def localCheckpoint(self, eager=True): - """Returns a locally checkpointed version of this Dataset. Checkpointing can be used to - truncate the logical plan of this :class:`DataFrame`, which is especially useful in + """Returns a locally checkpointed version of this :class:`DataFrame`. Checkpointing can be + used to truncate the logical plan of this :class:`DataFrame`, which is especially useful in iterative algorithms where the plan may grow exponentially. Local checkpoints are stored in the executors using the caching subsystem and therefore they are not reliable. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org