This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 350a74fe8d5 [SPARK-40057][PYTHON][DOCS] Cleanup "<BLANKLINE>" in doctest 350a74fe8d5 is described below commit 350a74fe8d5d5f0f82dac4e3123e71d896bdf09a Author: Yikun Jiang <yikunk...@gmail.com> AuthorDate: Fri Aug 12 23:04:47 2022 +0900 [SPARK-40057][PYTHON][DOCS] Cleanup "<BLANKLINE>" in doctest ### What changes were proposed in this pull request? Cleanup `<BLANKLINE>` in doctest ### Why are the changes needed? See https://github.com/apache/spark/pull/37465#discussion_r943071168, we'd better to cleanup all `<BLANKLINE>` in doctest to make doctest code more clear. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Closes #37492 from Yikun/blankline. Authored-by: Yikun Jiang <yikunk...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/ml/stat.py | 8 +++----- python/pyspark/mllib/tree.py | 9 +++------ python/pyspark/pandas/frame.py | 1 - python/pyspark/pandas/series.py | 38 +++++++++++++++++--------------------- python/pyspark/sql/dataframe.py | 1 - 5 files changed, 23 insertions(+), 34 deletions(-) diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py index b91ef1b6cb3..704d2dc9baa 100644 --- a/python/pyspark/ml/stat.py +++ b/python/pyspark/ml/stat.py @@ -274,28 +274,24 @@ class Summarizer: +-----------------------------------+ |{[1.0,1.0,1.0], 1} | +-----------------------------------+ - <BLANKLINE> >>> df.select(summarizer.summary(df.features)).show(truncate=False) +--------------------------------+ |aggregate_metrics(features, 1.0)| +--------------------------------+ |{[1.0,1.5,2.0], 2} | +--------------------------------+ - <BLANKLINE> >>> df.select(Summarizer.mean(df.features, df.weight)).show(truncate=False) +--------------+ |mean(features)| +--------------+ |[1.0,1.0,1.0] | +--------------+ - <BLANKLINE> >>> df.select(Summarizer.mean(df.features)).show(truncate=False) +--------------+ |mean(features)| +--------------+ |[1.0,1.5,2.0] | +--------------+ - <BLANKLINE> """ @staticmethod @@ -519,7 +515,9 @@ if __name__ == "__main__": globs["sc"] = sc globs["spark"] = spark - failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + failure_count, test_count = doctest.testmod( + globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE + ) spark.stop() if failure_count: sys.exit(-1) diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py index e1d87e99c8a..8a5c25d96a7 100644 --- a/python/pyspark/mllib/tree.py +++ b/python/pyspark/mllib/tree.py @@ -273,7 +273,6 @@ class DecisionTree: Predict: 0.0 Else (feature 0 > 0.5) Predict: 1.0 - <BLANKLINE> >>> model.predict(array([1.0])) 1.0 >>> model.predict(array([0.0])) @@ -511,10 +510,8 @@ class RandomForest: 7 >>> print(model) TreeEnsembleModel classifier with 3 trees - <BLANKLINE> >>> print(model.toDebugString()) TreeEnsembleModel classifier with 3 trees - <BLANKLINE> Tree 0: Predict: 1.0 Tree 1: @@ -527,7 +524,6 @@ class RandomForest: Predict: 0.0 Else (feature 0 > 1.5) Predict: 1.0 - <BLANKLINE> >>> model.predict([2.0]) 1.0 >>> model.predict([0.0]) @@ -764,7 +760,6 @@ class GradientBoostedTrees: 30 >>> print(model) # it already has newline TreeEnsembleModel classifier with 10 trees - <BLANKLINE> >>> model.predict([2.0]) 1.0 >>> model.predict([0.0]) @@ -881,7 +876,9 @@ def _test() -> None: spark = SparkSession.builder.master("local[4]").appName("mllib.tree tests").getOrCreate() globs["sc"] = spark.sparkContext - (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS) + (failure_count, test_count) = doctest.testmod( + globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE + ) spark.stop() if failure_count: sys.exit(-1) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 465541abdaa..b3ded9885fc 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -2217,7 +2217,6 @@ defaultdict(<class 'list'>, {'col..., 'col...})] Donatello & purple & bo staff \\ \bottomrule \end{tabular} - <BLANKLINE> """ args = locals() diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py index ff4c7fcc8f1..62eaa3eb1ca 100644 --- a/python/pyspark/pandas/series.py +++ b/python/pyspark/pandas/series.py @@ -2775,11 +2775,10 @@ class Series(Frame, IndexOpsMixin, Generic[T]): Examples -------- >>> psser = ps.Series([2, 1, 3, 3], name='A') - >>> psser.unique().sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - <BLANKLINE> - ... 1 - ... 2 - ... 3 + >>> psser.unique().sort_values() + 1 1 + 0 2 + 2 3 Name: A, dtype: int64 >>> ps.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() @@ -2787,11 +2786,10 @@ class Series(Frame, IndexOpsMixin, Generic[T]): dtype: datetime64[ns] >>> psser.name = ('x', 'a') - >>> psser.unique().sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - <BLANKLINE> - ... 1 - ... 2 - ... 3 + >>> psser.unique().sort_values() + 1 1 + 0 2 + 2 3 Name: (x, a), dtype: int64 """ sdf = self._internal.spark_frame.select(self.spark.column).distinct() @@ -4718,21 +4716,19 @@ class Series(Frame, IndexOpsMixin, Generic[T]): 13 NaN dtype: float64 - >>> s.mode().sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - <BLANKLINE> - ... 1.0 - ... 2.0 - ... 3.0 + >>> s.mode().sort_values() + 0 1.0 + 1 2.0 + 2 3.0 dtype: float64 With 'dropna' set to 'False', we can also see NaN in the result - >>> s.mode(False).sort_values() # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - <BLANKLINE> - ... 1.0 - ... 2.0 - ... 3.0 - ... NaN + >>> s.mode(False).sort_values() + 0 1.0 + 1 2.0 + 2 3.0 + 3 NaN dtype: float64 """ ser_count = self.value_counts(dropna=dropna, sort=False) diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 8ab3ed35578..565d3304596 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -387,7 +387,6 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin): root |-- age: integer (nullable = true) |-- name: string (nullable = true) - <BLANKLINE> """ print(self._jdf.schema().treeString()) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org