[spark] branch master updated: [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listDatabases

gurwls223 Sun, 04 Jun 2023 16:32:54 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 7c51618cc26 [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern 
for Catalog.listDatabases
7c51618cc26 is described below

commit 7c51618cc2627fee8e6c2983319dc5ab6060d33f
Author: Jiaan Geng <belie...@163.com>
AuthorDate: Mon Jun 5 08:32:28 2023 +0900

    [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for 
Catalog.listDatabases
    
    ### What changes were proposed in this pull request?
    Currently, the syntax `SHOW [NAMESPACES | DATABASES | SCHEMAS] LIKE 
pattern` supports an optional pattern, so as filtered out the expected 
databases.
    But the `Catalog.listDatabases` missing the function both in Catalog API 
and Connect Catalog API.
    
    In fact, the optional pattern is very useful.
    
    ### Why are the changes needed?
    This PR want add the optional pattern for `Catalog.listDatabases`.
    
    ### Does this PR introduce _any_ user-facing change?
    'No'.
    New feature.
    
    ### How was this patch tested?
    New test cases.
    
    Closes #41421 from beliefer/SPARK-43881.
    
    Authored-by: Jiaan Geng <belie...@163.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../org/apache/spark/sql/catalog/Catalog.scala     |   8 ++
 .../apache/spark/sql/internal/CatalogImpl.scala    |  12 +++
 .../scala/org/apache/spark/sql/CatalogSuite.scala  |   5 +
 .../src/main/protobuf/spark/connect/catalog.proto  |   5 +-
 .../sql/connect/planner/SparkConnectPlanner.scala  |   6 +-
 project/MimaExcludes.scala                         |   4 +-
 python/pyspark/sql/catalog.py                      |  21 ++++-
 python/pyspark/sql/connect/catalog.py              |   4 +-
 python/pyspark/sql/connect/plan.py                 |   8 +-
 python/pyspark/sql/connect/proto/catalog_pb2.py    | 104 ++++++++++-----------
 python/pyspark/sql/connect/proto/catalog_pb2.pyi   |  14 +++
 python/pyspark/sql/tests/test_catalog.py           |   4 +
 .../org/apache/spark/sql/catalog/Catalog.scala     |   8 ++
 .../apache/spark/sql/internal/CatalogImpl.scala    |  18 ++++
 .../apache/spark/sql/internal/CatalogSuite.scala   |   4 +
 15 files changed, 164 insertions(+), 61 deletions(-)

diff --git 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 62167b242de..363f895db20 100644
--- 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -51,6 +51,14 @@ abstract class Catalog {
    */
   def listDatabases(): Dataset[Database]
 
+  /**
+   * Returns a list of databases (namespaces) which name match the specify 
pattern and available
+   * within the current catalog.
+   *
+   * @since 3.5.0
+   */
+  def listDatabases(pattern: String): Dataset[Database]
+
   /**
    * Returns a list of tables/views in the current database (namespace). This 
includes all
    * temporary views.
diff --git 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index f72a99f6675..c2ed7f4e19e 100644
--- 
a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ 
b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -66,6 +66,18 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
     }
   }
 
+  /**
+   * Returns a list of databases (namespaces) which name match the specify 
pattern and available
+   * within the current catalog.
+   *
+   * @since 3.5.0
+   */
+  override def listDatabases(pattern: String): Dataset[Database] = {
+    sparkSession.newDataset(CatalogImpl.databaseEncoder) { builder =>
+      builder.getCatalogBuilder.getListDatabasesBuilder.setPattern(pattern)
+    }
+  }
+
   /**
    * Returns a list of tables/views in the current database (namespace). This 
includes all
    * temporary views.
diff --git 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
index 49741842377..396f7214c04 100644
--- 
a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
+++ 
b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
@@ -39,6 +39,11 @@ class CatalogSuite extends RemoteSparkSession with SQLHelper 
{
         assert(dbs.length == 2)
         assert(dbs.map(_.name) sameElements Array(db, currentDb))
         assert(dbs.map(_.catalog).distinct sameElements Array("spark_catalog"))
+        var databasesWithPattern = 
spark.catalog.listDatabases("def*").collect().sortBy(_.name)
+        assert(databasesWithPattern.length == 1)
+        assert(databasesWithPattern.map(_.name) sameElements Array(currentDb))
+        databasesWithPattern = 
spark.catalog.listDatabases("def2*").collect().sortBy(_.name)
+        assert(databasesWithPattern.length == 0)
         val database = spark.catalog.getDatabase(db)
         assert(database.name == db)
         val message = intercept[StatusRuntimeException] {
diff --git 
a/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto 
b/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto
index f048dbc7f25..57d75ee4a42 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto
@@ -68,7 +68,10 @@ message SetCurrentDatabase {
 }
 
 // See `spark.catalog.listDatabases`
-message ListDatabases { }
+message ListDatabases {
+  // (Optional) The pattern that the database name needs to match
+  optional string pattern = 1;
+}
 
 // See `spark.catalog.listTables`
 message ListTables {
diff --git 
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
 
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 631a9eee5f2..f09a4a4895b 100644
--- 
a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ 
b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -2697,7 +2697,11 @@ class SparkConnectPlanner(val session: SparkSession) {
   }
 
   private def transformListDatabases(getListDatabases: proto.ListDatabases): 
LogicalPlan = {
-    session.catalog.listDatabases().logicalPlan
+    if (getListDatabases.hasPattern) {
+      session.catalog.listDatabases(getListDatabases.getPattern).logicalPlan
+    } else {
+      session.catalog.listDatabases().logicalPlan
+    }
   }
 
   private def transformListTables(getListTables: proto.ListTables): 
LogicalPlan = {
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index a902cddf7da..e0b0604d1c0 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -44,7 +44,9 @@ object MimaExcludes {
     
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableFileStatus"),
     
ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableFileStatus$"),
     // [SPARK-43792][SQL][PYTHON][CONNECT] Add optional pattern for 
Catalog.listCatalogs
-    
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs")
+    
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs"),
+    // [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for 
Catalog.listDatabases
+    
ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listDatabases")
   )
 
   // Defulat exclude rules
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 83d5fbbeae7..c0df6f38dbf 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -190,12 +190,20 @@ class Catalog:
         """
         return self._jcatalog.setCurrentDatabase(dbName)
 
-    def listDatabases(self) -> List[Database]:
+    def listDatabases(self, pattern: Optional[str] = None) -> List[Database]:
         """
         Returns a list of databases available across all sessions.
 
         .. versionadded:: 2.0.0
 
+        Parameters
+        ----------
+        pattern : str
+            The pattern that the database name needs to match.
+
+            .. versionchanged: 3.5.0
+                Added ``pattern`` argument.
+
         Returns
         -------
         list
@@ -205,8 +213,17 @@ class Catalog:
         --------
         >>> spark.catalog.listDatabases()
         [Database(name='default', catalog='spark_catalog', 
description='default database', ...
+
+        >>> spark.catalog.listDatabases("def*")
+        [Database(name='default', catalog='spark_catalog', 
description='default database', ...
+
+        >>> spark.catalog.listDatabases("def2*")
+        []
         """
-        iter = self._jcatalog.listDatabases().toLocalIterator()
+        if pattern is None:
+            iter = self._jcatalog.listDatabases().toLocalIterator()
+        else:
+            iter = self._jcatalog.listDatabases(pattern).toLocalIterator()
         databases = []
         while iter.hasNext():
             jdb = iter.next()
diff --git a/python/pyspark/sql/connect/catalog.py 
b/python/pyspark/sql/connect/catalog.py
index cdcd96890b3..790b194c3f8 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -82,8 +82,8 @@ class Catalog:
 
     setCurrentDatabase.__doc__ = PySparkCatalog.setCurrentDatabase.__doc__
 
-    def listDatabases(self) -> List[Database]:
-        pdf = self._execute_and_fetch(plan.ListDatabases())
+    def listDatabases(self, pattern: Optional[str] = None) -> List[Database]:
+        pdf = self._execute_and_fetch(plan.ListDatabases(pattern=pattern))
         return [
             Database(
                 name=row.iloc[0],
diff --git a/python/pyspark/sql/connect/plan.py 
b/python/pyspark/sql/connect/plan.py
index 2793ecb3272..79c070101b6 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -1636,11 +1636,15 @@ class SetCurrentDatabase(LogicalPlan):
 
 
 class ListDatabases(LogicalPlan):
-    def __init__(self) -> None:
+    def __init__(self, pattern: Optional[str] = None) -> None:
         super().__init__(None)
+        self._pattern = pattern
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
-        return 
proto.Relation(catalog=proto.Catalog(list_databases=proto.ListDatabases()))
+        plan = 
proto.Relation(catalog=proto.Catalog(list_databases=proto.ListDatabases()))
+        if self._pattern is not None:
+            plan.catalog.list_databases.pattern = self._pattern
+        return plan
 
 
 class ListTables(LogicalPlan):
diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.py 
b/python/pyspark/sql/connect/proto/catalog_pb2.py
index 76c29fedc0c..f82b360a4a7 100644
--- a/python/pyspark/sql/connect/proto/catalog_pb2.py
+++ b/python/pyspark/sql/connect/proto/catalog_pb2.py
@@ -34,7 +34,7 @@ from pyspark.sql.connect.proto import types_pb2 as 
spark_dot_connect_dot_types__
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    
b'\n\x1bspark/connect/catalog.proto\x12\rspark.connect\x1a\x1aspark/connect/common.proto\x1a\x19spark/connect/types.proto"\xc6\x0e\n\x07\x43\x61talog\x12K\n\x10\x63urrent_database\x18\x01
 
\x01(\x0b\x32\x1e.spark.connect.CurrentDatabaseH\x00R\x0f\x63urrentDatabase\x12U\n\x14set_current_database\x18\x02
 
\x01(\x0b\x32!.spark.connect.SetCurrentDatabaseH\x00R\x12setCurrentDatabase\x12\x45\n\x0elist_databases\x18\x03
 \x01(\x0b\x32\x1c.spark.connect.ListDatabasesH\x00R\rlistDatabases\x12<\n [...]
+    
b'\n\x1bspark/connect/catalog.proto\x12\rspark.connect\x1a\x1aspark/connect/common.proto\x1a\x19spark/connect/types.proto"\xc6\x0e\n\x07\x43\x61talog\x12K\n\x10\x63urrent_database\x18\x01
 
\x01(\x0b\x32\x1e.spark.connect.CurrentDatabaseH\x00R\x0f\x63urrentDatabase\x12U\n\x14set_current_database\x18\x02
 
\x01(\x0b\x32!.spark.connect.SetCurrentDatabaseH\x00R\x12setCurrentDatabase\x12\x45\n\x0elist_databases\x18\x03
 \x01(\x0b\x32\x1c.spark.connect.ListDatabasesH\x00R\rlistDatabases\x12<\n [...]
 )
 
 
@@ -401,55 +401,55 @@ if _descriptor._USE_C_DESCRIPTORS == False:
     _SETCURRENTDATABASE._serialized_start = 1985
     _SETCURRENTDATABASE._serialized_end = 2030
     _LISTDATABASES._serialized_start = 2032
-    _LISTDATABASES._serialized_end = 2047
-    _LISTTABLES._serialized_start = 2049
-    _LISTTABLES._serialized_end = 2103
-    _LISTFUNCTIONS._serialized_start = 2105
-    _LISTFUNCTIONS._serialized_end = 2162
-    _LISTCOLUMNS._serialized_start = 2164
-    _LISTCOLUMNS._serialized_end = 2250
-    _GETDATABASE._serialized_start = 2252
-    _GETDATABASE._serialized_end = 2290
-    _GETTABLE._serialized_start = 2292
-    _GETTABLE._serialized_end = 2375
-    _GETFUNCTION._serialized_start = 2377
-    _GETFUNCTION._serialized_end = 2469
-    _DATABASEEXISTS._serialized_start = 2471
-    _DATABASEEXISTS._serialized_end = 2512
-    _TABLEEXISTS._serialized_start = 2514
-    _TABLEEXISTS._serialized_end = 2600
-    _FUNCTIONEXISTS._serialized_start = 2602
-    _FUNCTIONEXISTS._serialized_end = 2697
-    _CREATEEXTERNALTABLE._serialized_start = 2700
-    _CREATEEXTERNALTABLE._serialized_end = 3026
-    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_start = 2937
-    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_end = 2995
-    _CREATETABLE._serialized_start = 3029
-    _CREATETABLE._serialized_end = 3394
-    _CREATETABLE_OPTIONSENTRY._serialized_start = 2937
-    _CREATETABLE_OPTIONSENTRY._serialized_end = 2995
-    _DROPTEMPVIEW._serialized_start = 3396
-    _DROPTEMPVIEW._serialized_end = 3439
-    _DROPGLOBALTEMPVIEW._serialized_start = 3441
-    _DROPGLOBALTEMPVIEW._serialized_end = 3490
-    _RECOVERPARTITIONS._serialized_start = 3492
-    _RECOVERPARTITIONS._serialized_end = 3542
-    _ISCACHED._serialized_start = 3544
-    _ISCACHED._serialized_end = 3585
-    _CACHETABLE._serialized_start = 3588
-    _CACHETABLE._serialized_end = 3720
-    _UNCACHETABLE._serialized_start = 3722
-    _UNCACHETABLE._serialized_end = 3767
-    _CLEARCACHE._serialized_start = 3769
-    _CLEARCACHE._serialized_end = 3781
-    _REFRESHTABLE._serialized_start = 3783
-    _REFRESHTABLE._serialized_end = 3828
-    _REFRESHBYPATH._serialized_start = 3830
-    _REFRESHBYPATH._serialized_end = 3865
-    _CURRENTCATALOG._serialized_start = 3867
-    _CURRENTCATALOG._serialized_end = 3883
-    _SETCURRENTCATALOG._serialized_start = 3885
-    _SETCURRENTCATALOG._serialized_end = 3939
-    _LISTCATALOGS._serialized_start = 3941
-    _LISTCATALOGS._serialized_end = 3998
+    _LISTDATABASES._serialized_end = 2090
+    _LISTTABLES._serialized_start = 2092
+    _LISTTABLES._serialized_end = 2146
+    _LISTFUNCTIONS._serialized_start = 2148
+    _LISTFUNCTIONS._serialized_end = 2205
+    _LISTCOLUMNS._serialized_start = 2207
+    _LISTCOLUMNS._serialized_end = 2293
+    _GETDATABASE._serialized_start = 2295
+    _GETDATABASE._serialized_end = 2333
+    _GETTABLE._serialized_start = 2335
+    _GETTABLE._serialized_end = 2418
+    _GETFUNCTION._serialized_start = 2420
+    _GETFUNCTION._serialized_end = 2512
+    _DATABASEEXISTS._serialized_start = 2514
+    _DATABASEEXISTS._serialized_end = 2555
+    _TABLEEXISTS._serialized_start = 2557
+    _TABLEEXISTS._serialized_end = 2643
+    _FUNCTIONEXISTS._serialized_start = 2645
+    _FUNCTIONEXISTS._serialized_end = 2740
+    _CREATEEXTERNALTABLE._serialized_start = 2743
+    _CREATEEXTERNALTABLE._serialized_end = 3069
+    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_start = 2980
+    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_end = 3038
+    _CREATETABLE._serialized_start = 3072
+    _CREATETABLE._serialized_end = 3437
+    _CREATETABLE_OPTIONSENTRY._serialized_start = 2980
+    _CREATETABLE_OPTIONSENTRY._serialized_end = 3038
+    _DROPTEMPVIEW._serialized_start = 3439
+    _DROPTEMPVIEW._serialized_end = 3482
+    _DROPGLOBALTEMPVIEW._serialized_start = 3484
+    _DROPGLOBALTEMPVIEW._serialized_end = 3533
+    _RECOVERPARTITIONS._serialized_start = 3535
+    _RECOVERPARTITIONS._serialized_end = 3585
+    _ISCACHED._serialized_start = 3587
+    _ISCACHED._serialized_end = 3628
+    _CACHETABLE._serialized_start = 3631
+    _CACHETABLE._serialized_end = 3763
+    _UNCACHETABLE._serialized_start = 3765
+    _UNCACHETABLE._serialized_end = 3810
+    _CLEARCACHE._serialized_start = 3812
+    _CLEARCACHE._serialized_end = 3824
+    _REFRESHTABLE._serialized_start = 3826
+    _REFRESHTABLE._serialized_end = 3871
+    _REFRESHBYPATH._serialized_start = 3873
+    _REFRESHBYPATH._serialized_end = 3908
+    _CURRENTCATALOG._serialized_start = 3910
+    _CURRENTCATALOG._serialized_end = 3926
+    _SETCURRENTCATALOG._serialized_start = 3928
+    _SETCURRENTCATALOG._serialized_end = 3982
+    _LISTCATALOGS._serialized_start = 3984
+    _LISTCATALOGS._serialized_end = 4041
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.pyi 
b/python/pyspark/sql/connect/proto/catalog_pb2.pyi
index 3246f4926a5..fd58ca543ae 100644
--- a/python/pyspark/sql/connect/proto/catalog_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/catalog_pb2.pyi
@@ -347,9 +347,23 @@ class ListDatabases(google.protobuf.message.Message):
 
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
+    PATTERN_FIELD_NUMBER: builtins.int
+    pattern: builtins.str
+    """(Optional) The pattern that the database name needs to match"""
     def __init__(
         self,
+        *,
+        pattern: builtins.str | None = ...,
     ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_pattern", b"_pattern", 
"pattern", b"pattern"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["_pattern", b"_pattern", 
"pattern", b"pattern"]
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_pattern", b"_pattern"]
+    ) -> typing_extensions.Literal["pattern"] | None: ...
 
 global___ListDatabases = ListDatabases
 
diff --git a/python/pyspark/sql/tests/test_catalog.py 
b/python/pyspark/sql/tests/test_catalog.py
index 11fea788ff7..93390aa0881 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -42,6 +42,10 @@ class CatalogTestsMixin:
             spark.sql("CREATE DATABASE some_db")
             databases = [db.name for db in spark.catalog.listDatabases()]
             self.assertEqual(sorted(databases), ["default", "some_db"])
+            databases = [db.name for db in spark.catalog.listDatabases("def*")]
+            self.assertEqual(sorted(databases), ["default"])
+            databases = [db.name for db in 
spark.catalog.listDatabases("def2*")]
+            self.assertEqual(sorted(databases), [])
 
     def test_database_exists(self):
         # SPARK-36207: testing that database_exists returns correct boolean
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 7f70a413b86..c2cdd2382c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -53,6 +53,14 @@ abstract class Catalog {
    */
   def listDatabases(): Dataset[Database]
 
+  /**
+   * Returns a list of databases (namespaces) which name match the specify 
pattern and
+   * available within the current catalog.
+   *
+   * @since 3.5.0
+   */
+  def listDatabases(pattern: String): Dataset[Database]
+
   /**
    * Returns a list of tables/views in the current database (namespace).
    * This includes all temporary views.
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 3c5e12df9b4..f8da89eea0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -90,6 +90,24 @@ class CatalogImpl(sparkSession: SparkSession) extends 
Catalog {
     CatalogImpl.makeDataset(databases, sparkSession)
   }
 
+  /**
+   * Returns a list of databases (namespaces) which name match the specify 
pattern and
+   * available within the current catalog.
+   *
+   * @since 3.5.0
+   */
+  override def listDatabases(pattern: String): Dataset[Database] = {
+    val plan = ShowNamespaces(UnresolvedNamespace(Nil), Some(pattern))
+    val qe = sparkSession.sessionState.executePlan(plan)
+    val catalog = qe.analyzed.collectFirst {
+      case ShowNamespaces(r: ResolvedNamespace, _, _) => r.catalog
+    }.get
+    val databases = qe.toRdd.collect().map { row =>
+      getNamespace(catalog, parseIdent(row.getString(0)))
+    }
+    CatalogImpl.makeDataset(databases, sparkSession)
+  }
+
   /**
    * Returns a list of tables in the current database.
    * This includes all temporary tables.
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index cdf55122dc5..6fa7ad56b68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -156,6 +156,10 @@ class CatalogSuite extends SharedSparkSession with 
AnalysisTest with BeforeAndAf
     createDatabase("my_db2")
     assert(spark.catalog.listDatabases().collect().map(_.name).toSet ==
       Set("default", "my_db1", "my_db2"))
+    assert(spark.catalog.listDatabases("my*").collect().map(_.name).toSet ==
+      Set("my_db1", "my_db2"))
+    assert(spark.catalog.listDatabases("you*").collect().map(_.name).toSet ==
+      Set.empty[String])
     dropDatabase("my_db1")
     assert(spark.catalog.listDatabases().collect().map(_.name).toSet ==
       Set("default", "my_db2"))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listDatabases

Reply via email to