This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2635c38 [SPARK-34972][PYTHON] Make pandas-on-Spark doctests work 2635c38 is described below commit 2635c3894ff935bf1cd2d86648a28dcb4dc3dc73 Author: Takuya UESHIN <ues...@databricks.com> AuthorDate: Wed Apr 7 20:50:41 2021 +0900 [SPARK-34972][PYTHON] Make pandas-on-Spark doctests work ### What changes were proposed in this pull request? Now that we merged the Koalas main code into PySpark code base (#32036), we should enable doctests on the Spark's infrastructure. ### Why are the changes needed? Currently the pandas-on-Spark modules are not tested at all. We should enable doctests first, and we will port other unit tests separately later. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Enabled the whole doctests. Closes #32069 from ueshin/issues/SPARK-34972/pyspark-pandas_doctests. Authored-by: Takuya UESHIN <ues...@databricks.com> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .github/workflows/build_and_test.yml | 2 + dev/run-tests.py | 6 +-- dev/sparktestsupport/modules.py | 44 ++++++++++++++++ python/pyspark/pandas/__init__.py | 17 ++++-- python/pyspark/pandas/accessors.py | 30 +++++++++++ python/pyspark/pandas/base.py | 28 ++++++++++ python/pyspark/pandas/categorical.py | 30 +++++++++++ python/pyspark/pandas/config.py | 28 ++++++++++ python/pyspark/pandas/datetimes.py | 30 +++++++++++ python/pyspark/pandas/exceptions.py | 30 +++++++++++ python/pyspark/pandas/extensions.py | 32 ++++++++++++ python/pyspark/pandas/frame.py | 53 +++++++++++++++++-- python/pyspark/pandas/generic.py | 38 ++++++++++++++ python/pyspark/pandas/groupby.py | 40 +++++++++++++-- python/pyspark/pandas/indexes/base.py | 30 +++++++++++ python/pyspark/pandas/indexes/category.py | 30 +++++++++++ python/pyspark/pandas/indexes/datetimes.py | 30 +++++++++++ python/pyspark/pandas/indexes/multi.py | 32 ++++++++++++ python/pyspark/pandas/indexes/numeric.py | 30 +++++++++++ python/pyspark/pandas/indexing.py | 30 +++++++++++ python/pyspark/pandas/internal.py | 30 +++++++++++ python/pyspark/pandas/ml.py | 24 +++++++++ python/pyspark/pandas/mlflow.py | 39 +++++++++++++- python/pyspark/pandas/namespace.py | 60 +++++++++++++++++++--- python/pyspark/pandas/numpy_compat.py | 30 +++++++++++ python/pyspark/pandas/series.py | 30 ++++++++++- python/pyspark/pandas/spark/accessors.py | 48 +++++++++++++++++ python/pyspark/pandas/spark/utils.py | 19 +++++++ python/pyspark/pandas/{sql.py => sql_processor.py} | 30 +++++++++++ python/pyspark/pandas/strings.py | 30 +++++++++++ python/pyspark/pandas/typedef/typehints.py | 19 +++++++ python/pyspark/pandas/usage_logging/__init__.py | 6 +-- python/pyspark/pandas/utils.py | 28 ++++++++++ python/pyspark/pandas/window.py | 28 ++++++++++ 34 files changed, 983 insertions(+), 28 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9253d58..3abe206 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -161,6 +161,8 @@ jobs: pyspark-sql, pyspark-mllib, pyspark-resource - >- pyspark-core, pyspark-streaming, pyspark-ml + - >- + pyspark-pandas env: MODULES_TO_TEST: ${{ matrix.modules }} HADOOP_PROFILE: hadoop3.2 diff --git a/dev/run-tests.py b/dev/run-tests.py index 83f9f02..c5b412d 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -123,17 +123,17 @@ def determine_modules_to_test(changed_modules, deduplicated=True): >>> [x.name for x in determine_modules_to_test([modules.sql])] ... # doctest: +NORMALIZE_WHITESPACE ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver', - 'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml'] + 'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-ml'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sparkr, modules.sql], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml', - 'pyspark-mllib', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] + 'pyspark-mllib', 'pyspark-pandas', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10'] >>> sorted([x.name for x in determine_modules_to_test( ... [modules.sql, modules.core], deduplicated=False)]) ... # doctest: +NORMALIZE_WHITESPACE ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 'hive-thriftserver', - 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', + 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl'] diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 87bfbdf..6823415 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -573,6 +573,50 @@ pyspark_ml = Module( ] ) +pyspark_pandas = Module( + name="pyspark-pandas", + dependencies=[pyspark_core, pyspark_sql], + source_file_regexes=[ + "python/pyspark/pandas/" + ], + python_test_goals=[ + # doctests + "pyspark.pandas.accessors", + "pyspark.pandas.base", + "pyspark.pandas.categorical", + "pyspark.pandas.config", + "pyspark.pandas.datetimes", + "pyspark.pandas.exceptions", + "pyspark.pandas.extensions", + "pyspark.pandas.frame", + "pyspark.pandas.generic", + "pyspark.pandas.groupby", + "pyspark.pandas.indexing", + "pyspark.pandas.internal", + "pyspark.pandas.ml", + "pyspark.pandas.mlflow", + "pyspark.pandas.namespace", + "pyspark.pandas.numpy_compat", + "pyspark.pandas.series", + "pyspark.pandas.sql_processor", + "pyspark.pandas.strings", + "pyspark.pandas.utils", + "pyspark.pandas.window", + "pyspark.pandas.indexes.base", + "pyspark.pandas.indexes.category", + "pyspark.pandas.indexes.datetimes", + "pyspark.pandas.indexes.multi", + "pyspark.pandas.indexes.numeric", + "pyspark.pandas.spark.accessors", + "pyspark.pandas.spark.utils", + "pyspark.pandas.typedef.typehints", + ], + excluded_python_implementations=[ + "PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and + # they aren't available there + ] +) + sparkr = Module( name="sparkr", dependencies=[hive, mllib], diff --git a/python/pyspark/pandas/__init__.py b/python/pyspark/pandas/__init__.py index a605954..1e7363c 100644 --- a/python/pyspark/pandas/__init__.py +++ b/python/pyspark/pandas/__init__.py @@ -17,13 +17,24 @@ import os import sys from distutils.version import LooseVersion +import warnings + +from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version + +try: + require_minimum_pandas_version() + require_minimum_pyarrow_version() +except ImportError as e: + if os.environ.get("SPARK_TESTING"): + warnings.warn(str(e)) + sys.exit() + else: + raise from pyspark.pandas.version import __version__ # noqa: F401 def assert_python_version(): - import warnings - major = 3 minor = 5 deprecated_version = (major, minor) @@ -206,4 +217,4 @@ _auto_patch_pandas() # Import after the usage logger is attached. from pyspark.pandas.config import get_option, options, option_context, reset_option, set_option from pyspark.pandas.namespace import * # F405 -from pyspark.pandas.sql import sql +from pyspark.pandas.sql_processor import sql diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py index 39a647c0..bcc3e76 100644 --- a/python/pyspark/pandas/accessors.py +++ b/python/pyspark/pandas/accessors.py @@ -928,3 +928,33 @@ class KoalasSeriesMethods(object): ), dtype=dtype, ) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.accessors + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.accessors.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.accessors tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.accessors, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py index 6ae27a8..58a3a9c 100644 --- a/python/pyspark/pandas/base.py +++ b/python/pyspark/pandas/base.py @@ -1993,3 +1993,31 @@ class IndexOpsMixin(object, metaclass=ABCMeta): uniques = pd.Index(uniques_list) return codes, uniques + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.base + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.base.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]").appName("pyspark.pandas.base tests").getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.base, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/categorical.py b/python/pyspark/pandas/categorical.py index 87d56d0..c88e888 100644 --- a/python/pyspark/pandas/categorical.py +++ b/python/pyspark/pandas/categorical.py @@ -162,3 +162,33 @@ class CategoricalAccessor(object): self, new_categories, ordered: bool = None, rename: bool = False, inplace: bool = False ): raise NotImplementedError() + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.categorical + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.categorical.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.categorical tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.categorical, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/config.py b/python/pyspark/pandas/config.py index 7f011a2..4d0a3cd 100644 --- a/python/pyspark/pandas/config.py +++ b/python/pyspark/pandas/config.py @@ -440,3 +440,31 @@ class DictWrapper: options = DictWrapper(_options_dict) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.config + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.config.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]").appName("pyspark.pandas.config tests").getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.config, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/datetimes.py b/python/pyspark/pandas/datetimes.py index a5d3f38..bc93094 100644 --- a/python/pyspark/pandas/datetimes.py +++ b/python/pyspark/pandas/datetimes.py @@ -848,3 +848,33 @@ class DatetimeMethods(object): return s.dt.day_name(locale=locale) return self._data.koalas.transform_batch(pandas_day_name) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.datetimes + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.datetimes.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.datetimes tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.datetimes, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/exceptions.py b/python/pyspark/pandas/exceptions.py index 7ec874b..7be362a 100644 --- a/python/pyspark/pandas/exceptions.py +++ b/python/pyspark/pandas/exceptions.py @@ -104,3 +104,33 @@ class PandasNotImplementedError(NotImplementedError): class_name, property_name, reason ) super().__init__(msg) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.exceptions + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.exceptions.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.exceptions tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.exceptions, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/extensions.py b/python/pyspark/pandas/extensions.py index 9b67f2a..2ce2738 100644 --- a/python/pyspark/pandas/extensions.py +++ b/python/pyspark/pandas/extensions.py @@ -340,3 +340,35 @@ def register_index_accessor(name): from pyspark.pandas import Index return _register_accessor(name, Index) + + +def _test(): + import os + import doctest + import sys + import numpy + from pyspark.sql import SparkSession + import pyspark.pandas.extensions + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.extensions.__dict__.copy() + globs["np"] = numpy + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.extensions tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.extensions, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 4e7b9fa..96eb439 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -387,7 +387,7 @@ def _create_tuple_for_frame_type(params): return Tuple[tuple(new_params)] -if (3, 5) <= sys.version_info < (3, 7): +if (3, 5) <= sys.version_info < (3, 7) and __name__ != "__main__": from typing import GenericMeta # type: ignore # This is a workaround to support variadic generic in DataFrame in Python 3.5+. @@ -3506,7 +3506,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})] Examples -------- - >>> pp.range(1001).style # doctest: +ELLIPSIS + >>> pp.range(1001).style # doctest: +SKIP <pandas.io.formats.style.Styler object at ...> """ max_results = get_option("compute.max_rows") @@ -4694,16 +4694,17 @@ defaultdict(<class 'list'>, {'col..., 'col...})] Create a new Delta Lake table, partitioned by one column: - >>> df.to_delta('%s/to_delta/foo' % path, partition_cols='date') + >>> df.to_delta('%s/to_delta/foo' % path, partition_cols='date') # doctest: +SKIP Partitioned by two columns: - >>> df.to_delta('%s/to_delta/bar' % path, partition_cols=['date', 'country']) + >>> df.to_delta('%s/to_delta/bar' % path, + ... partition_cols=['date', 'country']) # doctest: +SKIP Overwrite an existing table's partitions, using the 'replaceWhere' capability in Delta: >>> df.to_delta('%s/to_delta/bar' % path, - ... mode='overwrite', replaceWhere='date >= "2012-01-01"') + ... mode='overwrite', replaceWhere='date >= "2012-01-01"') # doctest: +SKIP """ if "options" in options and isinstance(options.get("options"), dict) and len(options) == 1: options = options.get("options") # type: ignore @@ -11974,3 +11975,45 @@ class CachedDataFrame(DataFrame): return self.spark.unpersist() unpersist.__doc__ = CachedSparkFrameMethods.unpersist.__doc__ + + +def _test(): + import os + import doctest + import shutil + import sys + import tempfile + import uuid + from pyspark.sql import SparkSession + import pyspark.pandas.frame + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.frame.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]").appName("pyspark.pandas.frame tests").getOrCreate() + ) + + db_name = "db%s" % str(uuid.uuid4()).replace("-", "") + spark.sql("CREATE DATABASE %s" % db_name) + globs["db"] = db_name + + path = tempfile.mkdtemp() + globs["path"] = path + + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.frame, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + + shutil.rmtree(path, ignore_errors=True) + spark.sql("DROP DATABASE IF EXISTS %s CASCADE" % db_name) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index 46d6051..2d3e0ab 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -3100,3 +3100,41 @@ class Frame(object, metaclass=ABCMeta): return F.count(F.nanvl(spark_column, F.lit(None))) else: return F.count(spark_column) + + +def _test(): + import os + import doctest + import shutil + import sys + import tempfile + from pyspark.sql import SparkSession + import pyspark.pandas.generic + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.generic.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.generic tests") + .getOrCreate() + ) + + path = tempfile.mkdtemp() + globs["path"] = path + + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.generic, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + + shutil.rmtree(path, ignore_errors=True) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py index c3fe2d8..90ad41c 100644 --- a/python/pyspark/pandas/groupby.py +++ b/python/pyspark/pandas/groupby.py @@ -1894,12 +1894,12 @@ class GroupBy(object, metaclass=ABCMeta): ... 'b': [2, 3, 1, 4, 6, 9, 8, 10, 7, 5], ... 'c': [3, 5, 2, 5, 1, 2, 6, 4, 3, 6]}, ... columns=['a', 'b', 'c'], - ... index=[7, 2, 4, 1, 3, 4, 9, 10, 5, 6]) + ... index=[7, 2, 3, 1, 3, 4, 9, 10, 5, 6]) >>> df a b c 7 1 2 3 2 1 3 5 - 4 1 1 2 + 3 1 1 2 1 1 4 5 3 2 6 1 4 2 9 2 @@ -1911,16 +1911,16 @@ class GroupBy(object, metaclass=ABCMeta): >>> df.groupby('a').tail(2).sort_index() a b c 1 1 4 5 + 3 1 1 2 4 2 9 2 - 4 1 1 2 5 3 7 3 6 3 5 6 9 2 8 6 >>> df.groupby('a')['b'].tail(2).sort_index() 1 4 + 3 1 4 9 - 4 1 5 7 6 5 9 8 @@ -3184,3 +3184,35 @@ def normalize_keyword_aggregation(kwargs): if isinstance(order[0][0], tuple): order = [(*levs, method) for levs, method in order] return aggspec, columns, order + + +def _test(): + import os + import doctest + import sys + import numpy + from pyspark.sql import SparkSession + import pyspark.pandas.groupby + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.groupby.__dict__.copy() + globs["np"] = numpy + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.groupby tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.groupby, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py index 1b224df..170fa6c 100644 --- a/python/pyspark/pandas/indexes/base.py +++ b/python/pyspark/pandas/indexes/base.py @@ -2469,3 +2469,33 @@ class Index(IndexOpsMixin): "The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format(self.__class__.__name__) ) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.indexes.base + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.indexes.base.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.indexes.base tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.indexes.base, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/indexes/category.py b/python/pyspark/pandas/indexes/category.py index 8bdec59..cb4e3bd 100644 --- a/python/pyspark/pandas/indexes/category.py +++ b/python/pyspark/pandas/indexes/category.py @@ -186,3 +186,33 @@ class CategoricalIndex(Index): else: return partial(property_or_func, self) raise AttributeError("'CategoricalIndex' object has no attribute '{}'".format(item)) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.indexes.category + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.indexes.category.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.indexes.category tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.indexes.category, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py index 88ba7d2..b39196b 100644 --- a/python/pyspark/pandas/indexes/datetimes.py +++ b/python/pyspark/pandas/indexes/datetimes.py @@ -740,3 +740,33 @@ class DatetimeIndex(Index): def disallow_nanoseconds(freq): if freq in ["N", "ns"]: raise ValueError("nanoseconds is not supported") + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.indexes.datetimes + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.indexes.datetimes.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.indexes.datetimes tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.indexes.datetimes, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/indexes/multi.py b/python/pyspark/pandas/indexes/multi.py index ee8ea1e..623c83f 100644 --- a/python/pyspark/pandas/indexes/multi.py +++ b/python/pyspark/pandas/indexes/multi.py @@ -1168,3 +1168,35 @@ class MultiIndex(Index): def __iter__(self): return MissingPandasLikeMultiIndex.__iter__(self) + + +def _test(): + import os + import doctest + import sys + import numpy + from pyspark.sql import SparkSession + import pyspark.pandas.indexes.multi + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.indexes.multi.__dict__.copy() + globs["np"] = numpy + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.indexes.multi tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.indexes.multi, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/indexes/numeric.py b/python/pyspark/pandas/indexes/numeric.py index 1acad3f..f28bb61 100644 --- a/python/pyspark/pandas/indexes/numeric.py +++ b/python/pyspark/pandas/indexes/numeric.py @@ -145,3 +145,33 @@ class Float64Index(NumericIndex): return Index(data, dtype=dtype, copy=copy, name=name) return pp.from_pandas(pd.Float64Index(data=data, dtype=dtype, copy=copy, name=name)) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.indexes.numeric + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.indexes.numeric.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.indexes.numeric tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.indexes.numeric, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py index 0016d46..326fce1 100644 --- a/python/pyspark/pandas/indexing.py +++ b/python/pyspark/pandas/indexing.py @@ -1706,3 +1706,33 @@ class iLocIndexer(LocIndexerLike): # Clean up implicitly cached properties to be able to reuse the indexer. del self._internal del self._sequence_col + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.indexing + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.indexing.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.indexing tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.indexing, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py index 94abe97..c7fa438 100644 --- a/python/pyspark/pandas/internal.py +++ b/python/pyspark/pandas/internal.py @@ -1436,3 +1436,33 @@ class InternalFrame(object): reset_index[name] = col.replace({np.nan: None}) return reset_index, index_columns, index_dtypes, data_columns, data_dtypes + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.internal + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.internal.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.internal tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.internal, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/ml.py b/python/pyspark/pandas/ml.py index 6ea2cf3..912644f 100644 --- a/python/pyspark/pandas/ml.py +++ b/python/pyspark/pandas/ml.py @@ -89,3 +89,27 @@ def to_numeric_df(kdf: "pp.DataFrame") -> Tuple[pyspark.sql.DataFrame, List[Tupl va = VectorAssembler(inputCols=numeric_df.columns, outputCol=CORRELATION_OUTPUT_COLUMN) v = va.transform(numeric_df).select(CORRELATION_OUTPUT_COLUMN) return v, numeric_column_labels + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.ml + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.ml.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = SparkSession.builder.master("local[4]").appName("pyspark.pandas.ml tests").getOrCreate() + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.ml, globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/mlflow.py b/python/pyspark/pandas/mlflow.py index 15ff066..e073b1d 100644 --- a/python/pyspark/pandas/mlflow.py +++ b/python/pyspark/pandas/mlflow.py @@ -18,7 +18,6 @@ """ MLflow-related functions to load models and apply them to Koalas dataframes. """ -from mlflow import pyfunc from pyspark.sql.types import DataType import pandas as pd import numpy as np @@ -62,10 +61,14 @@ class PythonModelWrapper(object): """ The return object has to follow the API of mlflow.pyfunc.PythonModel. """ + from mlflow import pyfunc + return pyfunc.load_model(model_uri=self._model_uri) @lazy_property def _model_udf(self): + from mlflow import pyfunc + spark = default_session() return pyfunc.spark_udf(spark, model_uri=self._model_uri, result_type=self._return_type) @@ -190,3 +193,37 @@ def load_model(model_uri, predict_type="infer") -> PythonModelWrapper: 0 2.0 3.0 -1 1.376932 """ return PythonModelWrapper(model_uri, predict_type) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.mlflow + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.mlflow.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]").appName("pyspark.pandas.mlflow tests").getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.mlflow, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + try: + import mlflow # noqa: F401 + import sklearn # noqa: F401 + + _test() + except ImportError: + pass diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index 3a14d6a..1a4ba49 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -512,13 +512,14 @@ def read_delta( Examples -------- - >>> pp.range(1).to_delta('%s/read_delta/foo' % path) - >>> pp.read_delta('%s/read_delta/foo' % path) + >>> pp.range(1).to_delta('%s/read_delta/foo' % path) # doctest: +SKIP + >>> pp.read_delta('%s/read_delta/foo' % path) # doctest: +SKIP id 0 0 - >>> pp.range(10, 15, num_partitions=1).to_delta('%s/read_delta/foo' % path, mode='overwrite') - >>> pp.read_delta('%s/read_delta/foo' % path) + >>> pp.range(10, 15, num_partitions=1).to_delta('%s/read_delta/foo' % path, + ... mode='overwrite') # doctest: +SKIP + >>> pp.read_delta('%s/read_delta/foo' % path) # doctest: +SKIP id 0 10 1 11 @@ -526,16 +527,15 @@ def read_delta( 3 13 4 14 - >>> pp.read_delta('%s/read_delta/foo' % path, version=0) + >>> pp.read_delta('%s/read_delta/foo' % path, version=0) # doctest: +SKIP id 0 0 You can preserve the index in the roundtrip as below. >>> pp.range(10, 15, num_partitions=1).to_delta( - ... '%s/read_delta/bar' % path, index_col="index") - >>> pp.read_delta('%s/read_delta/bar' % path, index_col="index") - ... # doctest: +NORMALIZE_WHITESPACE + ... '%s/read_delta/bar' % path, index_col="index") # doctest: +SKIP + >>> pp.read_delta('%s/read_delta/bar' % path, index_col="index") # doctest: +SKIP id index 0 10 @@ -2875,3 +2875,47 @@ _get_dummies_acceptable_types = _get_dummies_default_accept_types + ( BooleanType, TimestampType, ) + + +def _test(): + import os + import doctest + import shutil + import sys + import tempfile + import uuid + from pyspark.sql import SparkSession + import pyspark.pandas.namespace + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.namespace.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.namespace tests") + .getOrCreate() + ) + + db_name = "db%s" % str(uuid.uuid4()).replace("-", "") + spark.sql("CREATE DATABASE %s" % db_name) + globs["db"] = db_name + + path = tempfile.mkdtemp() + globs["path"] = path + + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.namespace, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + + shutil.rmtree(path, ignore_errors=True) + spark.sql("DROP DATABASE IF EXISTS %s CASCADE" % db_name) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/numpy_compat.py b/python/pyspark/pandas/numpy_compat.py index 3ab4073..8d04896 100644 --- a/python/pyspark/pandas/numpy_compat.py +++ b/python/pyspark/pandas/numpy_compat.py @@ -208,3 +208,33 @@ def maybe_dispatch_ufunc_to_spark_func( return column_op(convert_arguments)(*inputs) # type: ignore else: return NotImplemented + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.numpy_compat + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.numpy_compat.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.numpy_compat tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.numpy_compat, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py index 4f513a9..44de907 100644 --- a/python/pyspark/pandas/series.py +++ b/python/pyspark/pandas/series.py @@ -336,7 +336,7 @@ def _create_type_for_series_type(param): return SeriesType[new_class] -if (3, 5) <= sys.version_info < (3, 7): +if (3, 5) <= sys.version_info < (3, 7) and __name__ != "__main__": from typing import GenericMeta # type: ignore old_getitem = GenericMeta.__getitem__ # type: ignore @@ -6233,3 +6233,31 @@ def first_series(df) -> Union["Series", pd.Series]: return df._kser_for(df._internal.column_labels[0]) else: return df[df.columns[0]] + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.series + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.series.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]").appName("pyspark.pandas.series tests").getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.series, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/spark/accessors.py b/python/pyspark/pandas/spark/accessors.py index 543e3f4..8b835d3 100644 --- a/python/pyspark/pandas/spark/accessors.py +++ b/python/pyspark/pandas/spark/accessors.py @@ -1247,3 +1247,51 @@ class CachedSparkFrameMethods(SparkFrameMethods): """ if self._kdf._cached.is_cached: self._kdf._cached.unpersist() + + +def _test(): + import os + import doctest + import shutil + import sys + import tempfile + import uuid + import numpy + import pandas + from pyspark.sql import SparkSession + import pyspark.pandas.spark.accessors + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.spark.accessors.__dict__.copy() + globs["np"] = numpy + globs["pd"] = pandas + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.spark.accessors tests") + .getOrCreate() + ) + + db_name = "db%s" % str(uuid.uuid4()).replace("-", "") + spark.sql("CREATE DATABASE %s" % db_name) + globs["db"] = db_name + + path = tempfile.mkdtemp() + globs["path"] = path + + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.spark.accessors, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + + shutil.rmtree(path, ignore_errors=True) + spark.sql("DROP DATABASE IF EXISTS %s CASCADE" % db_name) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/spark/utils.py b/python/pyspark/pandas/spark/utils.py index 46505a9..09aed45 100644 --- a/python/pyspark/pandas/spark/utils.py +++ b/python/pyspark/pandas/spark/utils.py @@ -122,3 +122,22 @@ StructField(B,DecimalType(30,15),false))) return DecimalType(precision=precision, scale=scale) else: return dt + + +def _test(): + import doctest + import sys + import pyspark.pandas.spark.utils + + globs = pyspark.pandas.spark.utils.__dict__.copy() + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.spark.utils, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/sql.py b/python/pyspark/pandas/sql_processor.py similarity index 93% rename from python/pyspark/pandas/sql.py rename to python/pyspark/pandas/sql_processor.py index aec2ae9..f26e82f 100644 --- a/python/pyspark/pandas/sql.py +++ b/python/pyspark/pandas/sql_processor.py @@ -300,3 +300,33 @@ class SQLProcessor(object): if isinstance(var, (tuple, range)): return self._convert_var(list(var)) raise ValueError("Unsupported variable type {}: {}".format(type(var).__name__, str(var))) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.sql_processor + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.sql_processor.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.sql_processor tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.sql_processor, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py index 3945184..a57f372 100644 --- a/python/pyspark/pandas/strings.py +++ b/python/pyspark/pandas/strings.py @@ -2287,3 +2287,33 @@ class StringMethods(object): Not supported. """ raise NotImplementedError() + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.strings + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.strings.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]") + .appName("pyspark.pandas.strings tests") + .getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.strings, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py index 657c9a7..5d461f6 100644 --- a/python/pyspark/pandas/typedef/typehints.py +++ b/python/pyspark/pandas/typedef/typehints.py @@ -519,3 +519,22 @@ def infer_return_type(f) -> Union[SeriesType, DataFrameType, ScalarType, Unknown return UnknownType(tpe) else: return ScalarType(*types) + + +def _test(): + import doctest + import sys + import pyspark.pandas.typedef.typehints + + globs = pyspark.pandas.typedef.typehints.__dict__.copy() + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.typedef.typehints, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/usage_logging/__init__.py b/python/pyspark/pandas/usage_logging/__init__.py index cd04d99..300f31a 100644 --- a/python/pyspark/pandas/usage_logging/__init__.py +++ b/python/pyspark/pandas/usage_logging/__init__.py @@ -25,7 +25,7 @@ from typing import Union import pandas as pd -from pyspark.pandas import config, namespace, sql +from pyspark.pandas import config, namespace, sql_processor from pyspark.pandas.accessors import KoalasFrameMethods from pyspark.pandas.frame import DataFrame from pyspark.pandas.datetimes import DatetimeMethods @@ -114,8 +114,8 @@ def attach(logger_module: Union[str, ModuleType]) -> None: except ImportError: pass - sql._CAPTURE_SCOPES = 3 # type: ignore - modules.append(sql) # type: ignore + sql_processor._CAPTURE_SCOPES = 3 # type: ignore + modules.append(sql_processor) # type: ignore # Modules for target_module in modules: diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py index 002bac94..dfbe766 100644 --- a/python/pyspark/pandas/utils.py +++ b/python/pyspark/pandas/utils.py @@ -876,3 +876,31 @@ def compare_disallow_null(left, right, comp): def compare_allow_null(left, right, comp): return left.isNull() | right.isNull() | comp(left, right) + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.utils + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.utils.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]").appName("pyspark.pandas.utils tests").getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.utils, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py index c42f074..a4ba4ac 100644 --- a/python/pyspark/pandas/window.py +++ b/python/pyspark/pandas/window.py @@ -1737,3 +1737,31 @@ class ExpandingGroupby(Expanding): numpy.var : Equivalent method for Numpy array. """ return super().var() + + +def _test(): + import os + import doctest + import sys + from pyspark.sql import SparkSession + import pyspark.pandas.window + + os.chdir(os.environ["SPARK_HOME"]) + + globs = pyspark.pandas.window.__dict__.copy() + globs["pp"] = pyspark.pandas + spark = ( + SparkSession.builder.master("local[4]").appName("pyspark.pandas.window tests").getOrCreate() + ) + (failure_count, test_count) = doctest.testmod( + pyspark.pandas.window, + globs=globs, + optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE, + ) + spark.stop() + if failure_count: + sys.exit(-1) + + +if __name__ == "__main__": + _test() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org