This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 7c51618cc26 [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listDatabases 7c51618cc26 is described below commit 7c51618cc2627fee8e6c2983319dc5ab6060d33f Author: Jiaan Geng <belie...@163.com> AuthorDate: Mon Jun 5 08:32:28 2023 +0900 [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listDatabases ### What changes were proposed in this pull request? Currently, the syntax `SHOW [NAMESPACES | DATABASES | SCHEMAS] LIKE pattern` supports an optional pattern, so as filtered out the expected databases. But the `Catalog.listDatabases` missing the function both in Catalog API and Connect Catalog API. In fact, the optional pattern is very useful. ### Why are the changes needed? This PR want add the optional pattern for `Catalog.listDatabases`. ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New test cases. Closes #41421 from beliefer/SPARK-43881. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../org/apache/spark/sql/catalog/Catalog.scala | 8 ++ .../apache/spark/sql/internal/CatalogImpl.scala | 12 +++ .../scala/org/apache/spark/sql/CatalogSuite.scala | 5 + .../src/main/protobuf/spark/connect/catalog.proto | 5 +- .../sql/connect/planner/SparkConnectPlanner.scala | 6 +- project/MimaExcludes.scala | 4 +- python/pyspark/sql/catalog.py | 21 ++++- python/pyspark/sql/connect/catalog.py | 4 +- python/pyspark/sql/connect/plan.py | 8 +- python/pyspark/sql/connect/proto/catalog_pb2.py | 104 ++++++++++----------- python/pyspark/sql/connect/proto/catalog_pb2.pyi | 14 +++ python/pyspark/sql/tests/test_catalog.py | 4 + .../org/apache/spark/sql/catalog/Catalog.scala | 8 ++ .../apache/spark/sql/internal/CatalogImpl.scala | 18 ++++ .../apache/spark/sql/internal/CatalogSuite.scala | 4 + 15 files changed, 164 insertions(+), 61 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala index 62167b242de..363f895db20 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala @@ -51,6 +51,14 @@ abstract class Catalog { */ def listDatabases(): Dataset[Database] + /** + * Returns a list of databases (namespaces) which name match the specify pattern and available + * within the current catalog. + * + * @since 3.5.0 + */ + def listDatabases(pattern: String): Dataset[Database] + /** * Returns a list of tables/views in the current database (namespace). This includes all * temporary views. diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index f72a99f6675..c2ed7f4e19e 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -66,6 +66,18 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { } } + /** + * Returns a list of databases (namespaces) which name match the specify pattern and available + * within the current catalog. + * + * @since 3.5.0 + */ + override def listDatabases(pattern: String): Dataset[Database] = { + sparkSession.newDataset(CatalogImpl.databaseEncoder) { builder => + builder.getCatalogBuilder.getListDatabasesBuilder.setPattern(pattern) + } + } + /** * Returns a list of tables/views in the current database (namespace). This includes all * temporary views. diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala index 49741842377..396f7214c04 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala @@ -39,6 +39,11 @@ class CatalogSuite extends RemoteSparkSession with SQLHelper { assert(dbs.length == 2) assert(dbs.map(_.name) sameElements Array(db, currentDb)) assert(dbs.map(_.catalog).distinct sameElements Array("spark_catalog")) + var databasesWithPattern = spark.catalog.listDatabases("def*").collect().sortBy(_.name) + assert(databasesWithPattern.length == 1) + assert(databasesWithPattern.map(_.name) sameElements Array(currentDb)) + databasesWithPattern = spark.catalog.listDatabases("def2*").collect().sortBy(_.name) + assert(databasesWithPattern.length == 0) val database = spark.catalog.getDatabase(db) assert(database.name == db) val message = intercept[StatusRuntimeException] { diff --git a/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto b/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto index f048dbc7f25..57d75ee4a42 100644 --- a/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto +++ b/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto @@ -68,7 +68,10 @@ message SetCurrentDatabase { } // See `spark.catalog.listDatabases` -message ListDatabases { } +message ListDatabases { + // (Optional) The pattern that the database name needs to match + optional string pattern = 1; +} // See `spark.catalog.listTables` message ListTables { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 631a9eee5f2..f09a4a4895b 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -2697,7 +2697,11 @@ class SparkConnectPlanner(val session: SparkSession) { } private def transformListDatabases(getListDatabases: proto.ListDatabases): LogicalPlan = { - session.catalog.listDatabases().logicalPlan + if (getListDatabases.hasPattern) { + session.catalog.listDatabases(getListDatabases.getPattern).logicalPlan + } else { + session.catalog.listDatabases().logicalPlan + } } private def transformListTables(getListTables: proto.ListTables): LogicalPlan = { diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index a902cddf7da..e0b0604d1c0 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -44,7 +44,9 @@ object MimaExcludes { ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableFileStatus"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableFileStatus$"), // [SPARK-43792][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listCatalogs - ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs") + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs"), + // [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listDatabases + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listDatabases") ) // Defulat exclude rules diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 83d5fbbeae7..c0df6f38dbf 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -190,12 +190,20 @@ class Catalog: """ return self._jcatalog.setCurrentDatabase(dbName) - def listDatabases(self) -> List[Database]: + def listDatabases(self, pattern: Optional[str] = None) -> List[Database]: """ Returns a list of databases available across all sessions. .. versionadded:: 2.0.0 + Parameters + ---------- + pattern : str + The pattern that the database name needs to match. + + .. versionchanged: 3.5.0 + Added ``pattern`` argument. + Returns ------- list @@ -205,8 +213,17 @@ class Catalog: -------- >>> spark.catalog.listDatabases() [Database(name='default', catalog='spark_catalog', description='default database', ... + + >>> spark.catalog.listDatabases("def*") + [Database(name='default', catalog='spark_catalog', description='default database', ... + + >>> spark.catalog.listDatabases("def2*") + [] """ - iter = self._jcatalog.listDatabases().toLocalIterator() + if pattern is None: + iter = self._jcatalog.listDatabases().toLocalIterator() + else: + iter = self._jcatalog.listDatabases(pattern).toLocalIterator() databases = [] while iter.hasNext(): jdb = iter.next() diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py index cdcd96890b3..790b194c3f8 100644 --- a/python/pyspark/sql/connect/catalog.py +++ b/python/pyspark/sql/connect/catalog.py @@ -82,8 +82,8 @@ class Catalog: setCurrentDatabase.__doc__ = PySparkCatalog.setCurrentDatabase.__doc__ - def listDatabases(self) -> List[Database]: - pdf = self._execute_and_fetch(plan.ListDatabases()) + def listDatabases(self, pattern: Optional[str] = None) -> List[Database]: + pdf = self._execute_and_fetch(plan.ListDatabases(pattern=pattern)) return [ Database( name=row.iloc[0], diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py index 2793ecb3272..79c070101b6 100644 --- a/python/pyspark/sql/connect/plan.py +++ b/python/pyspark/sql/connect/plan.py @@ -1636,11 +1636,15 @@ class SetCurrentDatabase(LogicalPlan): class ListDatabases(LogicalPlan): - def __init__(self) -> None: + def __init__(self, pattern: Optional[str] = None) -> None: super().__init__(None) + self._pattern = pattern def plan(self, session: "SparkConnectClient") -> proto.Relation: - return proto.Relation(catalog=proto.Catalog(list_databases=proto.ListDatabases())) + plan = proto.Relation(catalog=proto.Catalog(list_databases=proto.ListDatabases())) + if self._pattern is not None: + plan.catalog.list_databases.pattern = self._pattern + return plan class ListTables(LogicalPlan): diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.py b/python/pyspark/sql/connect/proto/catalog_pb2.py index 76c29fedc0c..f82b360a4a7 100644 --- a/python/pyspark/sql/connect/proto/catalog_pb2.py +++ b/python/pyspark/sql/connect/proto/catalog_pb2.py @@ -34,7 +34,7 @@ from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1bspark/connect/catalog.proto\x12\rspark.connect\x1a\x1aspark/connect/common.proto\x1a\x19spark/connect/types.proto"\xc6\x0e\n\x07\x43\x61talog\x12K\n\x10\x63urrent_database\x18\x01 \x01(\x0b\x32\x1e.spark.connect.CurrentDatabaseH\x00R\x0f\x63urrentDatabase\x12U\n\x14set_current_database\x18\x02 \x01(\x0b\x32!.spark.connect.SetCurrentDatabaseH\x00R\x12setCurrentDatabase\x12\x45\n\x0elist_databases\x18\x03 \x01(\x0b\x32\x1c.spark.connect.ListDatabasesH\x00R\rlistDatabases\x12<\n [...] + b'\n\x1bspark/connect/catalog.proto\x12\rspark.connect\x1a\x1aspark/connect/common.proto\x1a\x19spark/connect/types.proto"\xc6\x0e\n\x07\x43\x61talog\x12K\n\x10\x63urrent_database\x18\x01 \x01(\x0b\x32\x1e.spark.connect.CurrentDatabaseH\x00R\x0f\x63urrentDatabase\x12U\n\x14set_current_database\x18\x02 \x01(\x0b\x32!.spark.connect.SetCurrentDatabaseH\x00R\x12setCurrentDatabase\x12\x45\n\x0elist_databases\x18\x03 \x01(\x0b\x32\x1c.spark.connect.ListDatabasesH\x00R\rlistDatabases\x12<\n [...] ) @@ -401,55 +401,55 @@ if _descriptor._USE_C_DESCRIPTORS == False: _SETCURRENTDATABASE._serialized_start = 1985 _SETCURRENTDATABASE._serialized_end = 2030 _LISTDATABASES._serialized_start = 2032 - _LISTDATABASES._serialized_end = 2047 - _LISTTABLES._serialized_start = 2049 - _LISTTABLES._serialized_end = 2103 - _LISTFUNCTIONS._serialized_start = 2105 - _LISTFUNCTIONS._serialized_end = 2162 - _LISTCOLUMNS._serialized_start = 2164 - _LISTCOLUMNS._serialized_end = 2250 - _GETDATABASE._serialized_start = 2252 - _GETDATABASE._serialized_end = 2290 - _GETTABLE._serialized_start = 2292 - _GETTABLE._serialized_end = 2375 - _GETFUNCTION._serialized_start = 2377 - _GETFUNCTION._serialized_end = 2469 - _DATABASEEXISTS._serialized_start = 2471 - _DATABASEEXISTS._serialized_end = 2512 - _TABLEEXISTS._serialized_start = 2514 - _TABLEEXISTS._serialized_end = 2600 - _FUNCTIONEXISTS._serialized_start = 2602 - _FUNCTIONEXISTS._serialized_end = 2697 - _CREATEEXTERNALTABLE._serialized_start = 2700 - _CREATEEXTERNALTABLE._serialized_end = 3026 - _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_start = 2937 - _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_end = 2995 - _CREATETABLE._serialized_start = 3029 - _CREATETABLE._serialized_end = 3394 - _CREATETABLE_OPTIONSENTRY._serialized_start = 2937 - _CREATETABLE_OPTIONSENTRY._serialized_end = 2995 - _DROPTEMPVIEW._serialized_start = 3396 - _DROPTEMPVIEW._serialized_end = 3439 - _DROPGLOBALTEMPVIEW._serialized_start = 3441 - _DROPGLOBALTEMPVIEW._serialized_end = 3490 - _RECOVERPARTITIONS._serialized_start = 3492 - _RECOVERPARTITIONS._serialized_end = 3542 - _ISCACHED._serialized_start = 3544 - _ISCACHED._serialized_end = 3585 - _CACHETABLE._serialized_start = 3588 - _CACHETABLE._serialized_end = 3720 - _UNCACHETABLE._serialized_start = 3722 - _UNCACHETABLE._serialized_end = 3767 - _CLEARCACHE._serialized_start = 3769 - _CLEARCACHE._serialized_end = 3781 - _REFRESHTABLE._serialized_start = 3783 - _REFRESHTABLE._serialized_end = 3828 - _REFRESHBYPATH._serialized_start = 3830 - _REFRESHBYPATH._serialized_end = 3865 - _CURRENTCATALOG._serialized_start = 3867 - _CURRENTCATALOG._serialized_end = 3883 - _SETCURRENTCATALOG._serialized_start = 3885 - _SETCURRENTCATALOG._serialized_end = 3939 - _LISTCATALOGS._serialized_start = 3941 - _LISTCATALOGS._serialized_end = 3998 + _LISTDATABASES._serialized_end = 2090 + _LISTTABLES._serialized_start = 2092 + _LISTTABLES._serialized_end = 2146 + _LISTFUNCTIONS._serialized_start = 2148 + _LISTFUNCTIONS._serialized_end = 2205 + _LISTCOLUMNS._serialized_start = 2207 + _LISTCOLUMNS._serialized_end = 2293 + _GETDATABASE._serialized_start = 2295 + _GETDATABASE._serialized_end = 2333 + _GETTABLE._serialized_start = 2335 + _GETTABLE._serialized_end = 2418 + _GETFUNCTION._serialized_start = 2420 + _GETFUNCTION._serialized_end = 2512 + _DATABASEEXISTS._serialized_start = 2514 + _DATABASEEXISTS._serialized_end = 2555 + _TABLEEXISTS._serialized_start = 2557 + _TABLEEXISTS._serialized_end = 2643 + _FUNCTIONEXISTS._serialized_start = 2645 + _FUNCTIONEXISTS._serialized_end = 2740 + _CREATEEXTERNALTABLE._serialized_start = 2743 + _CREATEEXTERNALTABLE._serialized_end = 3069 + _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_start = 2980 + _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_end = 3038 + _CREATETABLE._serialized_start = 3072 + _CREATETABLE._serialized_end = 3437 + _CREATETABLE_OPTIONSENTRY._serialized_start = 2980 + _CREATETABLE_OPTIONSENTRY._serialized_end = 3038 + _DROPTEMPVIEW._serialized_start = 3439 + _DROPTEMPVIEW._serialized_end = 3482 + _DROPGLOBALTEMPVIEW._serialized_start = 3484 + _DROPGLOBALTEMPVIEW._serialized_end = 3533 + _RECOVERPARTITIONS._serialized_start = 3535 + _RECOVERPARTITIONS._serialized_end = 3585 + _ISCACHED._serialized_start = 3587 + _ISCACHED._serialized_end = 3628 + _CACHETABLE._serialized_start = 3631 + _CACHETABLE._serialized_end = 3763 + _UNCACHETABLE._serialized_start = 3765 + _UNCACHETABLE._serialized_end = 3810 + _CLEARCACHE._serialized_start = 3812 + _CLEARCACHE._serialized_end = 3824 + _REFRESHTABLE._serialized_start = 3826 + _REFRESHTABLE._serialized_end = 3871 + _REFRESHBYPATH._serialized_start = 3873 + _REFRESHBYPATH._serialized_end = 3908 + _CURRENTCATALOG._serialized_start = 3910 + _CURRENTCATALOG._serialized_end = 3926 + _SETCURRENTCATALOG._serialized_start = 3928 + _SETCURRENTCATALOG._serialized_end = 3982 + _LISTCATALOGS._serialized_start = 3984 + _LISTCATALOGS._serialized_end = 4041 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.pyi b/python/pyspark/sql/connect/proto/catalog_pb2.pyi index 3246f4926a5..fd58ca543ae 100644 --- a/python/pyspark/sql/connect/proto/catalog_pb2.pyi +++ b/python/pyspark/sql/connect/proto/catalog_pb2.pyi @@ -347,9 +347,23 @@ class ListDatabases(google.protobuf.message.Message): DESCRIPTOR: google.protobuf.descriptor.Descriptor + PATTERN_FIELD_NUMBER: builtins.int + pattern: builtins.str + """(Optional) The pattern that the database name needs to match""" def __init__( self, + *, + pattern: builtins.str | None = ..., ) -> None: ... + def HasField( + self, field_name: typing_extensions.Literal["_pattern", b"_pattern", "pattern", b"pattern"] + ) -> builtins.bool: ... + def ClearField( + self, field_name: typing_extensions.Literal["_pattern", b"_pattern", "pattern", b"pattern"] + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["_pattern", b"_pattern"] + ) -> typing_extensions.Literal["pattern"] | None: ... global___ListDatabases = ListDatabases diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py index 11fea788ff7..93390aa0881 100644 --- a/python/pyspark/sql/tests/test_catalog.py +++ b/python/pyspark/sql/tests/test_catalog.py @@ -42,6 +42,10 @@ class CatalogTestsMixin: spark.sql("CREATE DATABASE some_db") databases = [db.name for db in spark.catalog.listDatabases()] self.assertEqual(sorted(databases), ["default", "some_db"]) + databases = [db.name for db in spark.catalog.listDatabases("def*")] + self.assertEqual(sorted(databases), ["default"]) + databases = [db.name for db in spark.catalog.listDatabases("def2*")] + self.assertEqual(sorted(databases), []) def test_database_exists(self): # SPARK-36207: testing that database_exists returns correct boolean diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala index 7f70a413b86..c2cdd2382c4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala @@ -53,6 +53,14 @@ abstract class Catalog { */ def listDatabases(): Dataset[Database] + /** + * Returns a list of databases (namespaces) which name match the specify pattern and + * available within the current catalog. + * + * @since 3.5.0 + */ + def listDatabases(pattern: String): Dataset[Database] + /** * Returns a list of tables/views in the current database (namespace). * This includes all temporary views. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index 3c5e12df9b4..f8da89eea0a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -90,6 +90,24 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { CatalogImpl.makeDataset(databases, sparkSession) } + /** + * Returns a list of databases (namespaces) which name match the specify pattern and + * available within the current catalog. + * + * @since 3.5.0 + */ + override def listDatabases(pattern: String): Dataset[Database] = { + val plan = ShowNamespaces(UnresolvedNamespace(Nil), Some(pattern)) + val qe = sparkSession.sessionState.executePlan(plan) + val catalog = qe.analyzed.collectFirst { + case ShowNamespaces(r: ResolvedNamespace, _, _) => r.catalog + }.get + val databases = qe.toRdd.collect().map { row => + getNamespace(catalog, parseIdent(row.getString(0))) + } + CatalogImpl.makeDataset(databases, sparkSession) + } + /** * Returns a list of tables in the current database. * This includes all temporary tables. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala index cdf55122dc5..6fa7ad56b68 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala @@ -156,6 +156,10 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf createDatabase("my_db2") assert(spark.catalog.listDatabases().collect().map(_.name).toSet == Set("default", "my_db1", "my_db2")) + assert(spark.catalog.listDatabases("my*").collect().map(_.name).toSet == + Set("my_db1", "my_db2")) + assert(spark.catalog.listDatabases("you*").collect().map(_.name).toSet == + Set.empty[String]) dropDatabase("my_db1") assert(spark.catalog.listDatabases().collect().map(_.name).toSet == Set("default", "my_db2")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org