This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new c29cf34bfc6 [SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization c29cf34bfc6 is described below commit c29cf34bfc694cd3d959c82a25adf251975f0817 Author: Kent Yao <y...@apache.org> AuthorDate: Thu Mar 16 20:29:16 2023 -0700 [SPARK-42823][SQL] `spark-sql` shell supports multipart namespaces for initialization ### What changes were proposed in this pull request? Currently, we only support initializing spark-sql shell with a single-part schema, which also must be forced to the session catalog. #### case 1, specifying catalog field for v1sessioncatalog ```sql bin/spark-sql --database spark_catalog.default Exception in thread "main" org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'spark_catalog.default' not found ``` #### case 2, setting the default catalog to another one ```sql bin/spark-sql -c spark.sql.defaultCatalog=testcat -c spark.sql.catalog.testcat=org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog -c spark.sql.catalog.testcat.url='jdbc:derby:memory:testcat;create=true' -c spark.sql.catalog.testcat.driver=org.apache.derby.jdbc.AutoloadedDriver -c spark.sql.catalogImplementation=in-memory --database SYS 23/03/16 18:40:49 WARN ObjectStore: Failed to get database sys, returning NoSuchObjectException Exception in thread "main" org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'sys' not found ``` In this PR, we switch to use-statement to support multipart namespaces, which helps us resovle to catalog correctly. ### Why are the changes needed? Make spark-sql shell better support the v2 catalog framework. ### Does this PR introduce _any_ user-facing change? Yes, `--database` option supports multipart namespaces and works for v2 catalogs now. And you will see this behavior on spark web ui. ### How was this patch tested? new ut Closes #40457 from yaooqinn/SPARK-42823. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit 2000d5f8db838db62967a45d574728a8bf2aaf6b) Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 15 ++++++------- .../spark/sql/hive/thriftserver/CliSuite.scala | 26 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 51b314ad2c1..22df4e67440 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -201,14 +201,6 @@ private[hive] object SparkSQLCLIDriver extends Logging { case e: UnsupportedEncodingException => exit(ERROR_PATH_NOT_FOUND) } - if (sessionState.database != null) { - SparkSQLEnv.sqlContext.sessionState.catalog.setCurrentDatabase( - s"${sessionState.database}") - } - - // Execute -i init files (always in silent mode) - cli.processInitFiles(sessionState) - // We don't propagate hive.metastore.warehouse.dir, because it might has been adjusted in // [[SharedState.loadHiveConfFile]] based on the user specified or default values of // spark.sql.warehouse.dir and hive.metastore.warehouse.dir. @@ -216,6 +208,13 @@ private[hive] object SparkSQLCLIDriver extends Logging { SparkSQLEnv.sqlContext.setConf(k, v) } + if (sessionState.database != null) { + SparkSQLEnv.sqlContext.sql(s"USE ${sessionState.database}") + } + + // Execute -i init files (always in silent mode) + cli.processInitFiles(sessionState) + cli.printMasterAndAppId if (sessionState.execString != null) { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 5413635ba47..651c6b7aafb 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.HiveUtils._ import org.apache.spark.sql.hive.client.HiveClientImpl @@ -806,4 +807,29 @@ class CliSuite extends SparkFunSuite { prompt = "spark-sql (spark_42448)>")( "select current_database();" -> "spark_42448") } + + test("SPARK-42823: multipart identifier support for specify database by --database option") { + val catalogName = "testcat" + val catalogImpl = s"spark.sql.catalog.$catalogName=${classOf[JDBCTableCatalog].getName}" + val catalogUrl = + s"spark.sql.catalog.$catalogName.url=jdbc:derby:memory:$catalogName;create=true" + val catalogDriver = + s"spark.sql.catalog.$catalogName.driver=org.apache.derby.jdbc.AutoloadedDriver" + val database = s"-database $catalogName.SYS" + val catalogConfigs = + Seq(catalogImpl, catalogDriver, catalogUrl, "spark.sql.catalogImplementation=in-memory") + .flatMap(Seq("--conf", _)) + runCliWithin( + 2.minute, + catalogConfigs ++ Seq("--database", s"$catalogName.SYS"))( + "SELECT CURRENT_CATALOG();" -> catalogName, + "SELECT CURRENT_SCHEMA();" -> "SYS") + + runCliWithin( + 2.minute, + catalogConfigs ++ + Seq("--conf", s"spark.sql.defaultCatalog=$catalogName", "--database", "SYS"))( + "SELECT CURRENT_CATALOG();" -> catalogName, + "SELECT CURRENT_SCHEMA();" -> "SYS") + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org