This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new caab724 [SPARK-34359][SQL][3.1] Add a legacy config to restore the output schema of SHOW DATABASES caab724 is described below commit caab724801e75259e043a79bf996b6d9e0ac2a7b Author: Wenchen Fan <cloud0...@gmail.com> AuthorDate: Fri Feb 5 20:43:48 2021 +0800 [SPARK-34359][SQL][3.1] Add a legacy config to restore the output schema of SHOW DATABASES This backports https://github.com/apache/spark/pull/31474 to 3.1/3.0 This is a followup of https://github.com/apache/spark/pull/26006 In #26006 , we merged the v1 and v2 SHOW DATABASES/NAMESPACES commands, but we missed a behavior change that the output schema of SHOW DATABASES becomes different. This PR adds a legacy config to restore the old schema, with a migration guide item to mention this behavior change. Improve backward compatibility No (the legacy config is false by default) a new test Closes #31486 from cloud-fan/command-schema. Authored-by: Wenchen Fan <cloud0...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 7c87b48029e12ed0ce0b1b37f436ffb3d85ee83c) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- docs/sql-migration-guide.md | 2 ++ .../org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala | 9 ++++++--- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 9 +++++++++ .../spark/sql/catalyst/analysis/ResolveSessionCatalog.scala | 8 ++++++++ .../sql/execution/datasources/v2/DataSourceV2Strategy.scala | 4 ++-- .../scala/org/apache/spark/sql/execution/command/DDLSuite.scala | 6 ++++++ 6 files changed, 33 insertions(+), 5 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index c9b02da..742a05b 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -32,6 +32,8 @@ license: | - In Spark 3.0.2, `PARTITION(col=null)` is always parsed as a null literal in the partition spec. In Spark 3.0.1 or earlier, it is parsed as a string literal of its text representation, e.g., string "null", if the partition column is string type. To restore the legacy behavior, you can set `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true. + - In Spark 3.0.0, the output schema of `SHOW DATABASES` becomes `namespace: string`. In Spark version 2.4 and earlier, the schema was `databaseName: string`. Since Spark 3.0.2, you can restore the old schema by setting `spark.sql.legacy.keepCommandOutputSchema` to `true`. + ## Upgrading from Spark SQL 3.0 to 3.0.1 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 9077f7a..3f4a893 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -300,11 +300,14 @@ case class AlterNamespaceSetLocation( */ case class ShowNamespaces( namespace: LogicalPlan, - pattern: Option[String]) extends Command { + pattern: Option[String], + override val output: Seq[Attribute] = ShowNamespaces.OUTPUT) extends Command { override def children: Seq[LogicalPlan] = Seq(namespace) + override def producedAttributes: AttributeSet = outputSet +} - override val output: Seq[Attribute] = Seq( - AttributeReference("namespace", StringType, nullable = false)()) +object ShowNamespaces { + val OUTPUT = Seq(AttributeReference("namespace", StringType, nullable = false)()) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index f4c36dd..f55546f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2611,6 +2611,15 @@ object SQLConf { .checkValue(_ > 0, "The timeout value must be positive") .createWithDefault(10L) + val LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA = + buildConf("spark.sql.legacy.keepCommandOutputSchema") + .internal() + .doc("When true, Spark will keep the output schema of commands such as SHOW DATABASES " + + "unchanged, for v1 catalog and/or table.") + .version("3.0.2") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index 9a8d2f0..007193f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -225,6 +225,14 @@ class ResolveSessionCatalog( } AlterDatabaseSetLocationCommand(ns.head, location) + case s @ ShowNamespaces(ResolvedNamespace(cata, _), _, output) if isSessionCatalog(cata) => + if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) { + assert(output.length == 1) + s.copy(output = Seq(output.head.withName("databaseName"))) + } else { + s + } + // v1 RENAME TABLE supports temp view. case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) => AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 877aea1..4624e98 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -273,8 +273,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DropNamespace(ResolvedNamespace(catalog, ns), ifExists, cascade) => DropNamespaceExec(catalog, ns, ifExists, cascade) :: Nil - case r @ ShowNamespaces(ResolvedNamespace(catalog, ns), pattern) => - ShowNamespacesExec(r.output, catalog.asNamespaceCatalog, ns, pattern) :: Nil + case ShowNamespaces(ResolvedNamespace(catalog, ns), pattern, output) => + ShowNamespacesExec(output, catalog.asNamespaceCatalog, ns, pattern) :: Nil case r @ ShowTables(ResolvedNamespace(catalog, ns), pattern) => ShowTablesExec(r.output, catalog.asTableCatalog, ns, pattern) :: Nil diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 847bc66..21d7acf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1325,6 +1325,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { Nil) } + test("SPARK-34359: keep the legacy output schema") { + withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") { + assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName")) + } + } + test("drop view - temporary view") { val catalog = spark.sessionState.catalog sql( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org