This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new fa2bda5c4ea [SPARK-37878][SQL][FOLLOWUP] V1Table should always carry the "location" property fa2bda5c4ea is described below commit fa2bda5c4eabb23d5f5b3e14ccd055a2453f579f Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Wed May 11 14:49:54 2022 +0800 [SPARK-37878][SQL][FOLLOWUP] V1Table should always carry the "location" property ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/35204 . https://github.com/apache/spark/pull/35204 introduced a potential regression: it removes the "location" table property from `V1Table` if the table is not external. The intention was to avoid putting the LOCATION clause for managed tables in `ShowCreateTableExec`. However, if we use the v2 DESCRIBE TABLE command by default in the future, this will bring a behavior change and v2 DESCRIBE TABLE command won't print the tab [...] This PR fixes this regression by using a different idea to fix the SHOW CREATE TABLE issue: 1. introduce a new reserved table property `is_managed_location`, to indicate that the location is managed by the catalog, not user given. 2. `ShowCreateTableExec` only generates the LOCATION clause if the "location" property is present and is not managed. ### Why are the changes needed? avoid a potential regression ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests. We can add a test when we use v2 DESCRIBE TABLE command by default. Closes #36498 from cloud-fan/regression. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/connector/catalog/TableCatalog.java | 6 ++++++ .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 12 ++++++++++-- .../apache/spark/sql/connector/catalog/CatalogV2Util.scala | 3 ++- .../org/apache/spark/sql/connector/catalog/V1Table.scala | 6 +++--- .../sql/execution/datasources/v2/ShowCreateTableExec.scala | 10 +++++++--- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index 9336c2a1cae..ec773ab90ad 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -47,6 +47,12 @@ public interface TableCatalog extends CatalogPlugin { */ String PROP_LOCATION = "location"; + /** + * A reserved property to indicate that the table location is managed, not user-specified. + * If this property is "true", SHOW CREATE TABLE will not generate the LOCATION clause. + */ + String PROP_IS_MANAGED_LOCATION = "is_managed_location"; + /** * A reserved property to specify a table was created with EXTERNAL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 448ebcb35b3..e6f7dba863b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.CurrentOrigin import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils, ResolveDefaultColumns} import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone} -import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, TableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog} import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform} import org.apache.spark.sql.errors.QueryParsingErrors @@ -3288,7 +3288,15 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit throw QueryParsingErrors.cannotCleanReservedTablePropertyError( PROP_EXTERNAL, ctx, "please use CREATE EXTERNAL TABLE") case (PROP_EXTERNAL, _) => false - case _ => true + // It's safe to set whatever table comment, so we don't make it a reserved table property. + case (PROP_COMMENT, _) => true + case (k, _) => + val isReserved = CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) + if (!legacyOn && isReserved) { + throw QueryParsingErrors.cannotCleanReservedTablePropertyError( + k, ctx, "please remove it from the TBLPROPERTIES list.") + } + !isReserved } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index 2fc13510c54..4c174ad7c4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -48,7 +48,8 @@ private[sql] object CatalogV2Util { TableCatalog.PROP_LOCATION, TableCatalog.PROP_PROVIDER, TableCatalog.PROP_OWNER, - TableCatalog.PROP_EXTERNAL) + TableCatalog.PROP_EXTERNAL, + TableCatalog.PROP_IS_MANAGED_LOCATION) /** * The list of reserved namespace properties, which can not be removed or changed directly by diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index bf92107f6ae..da201e81649 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -74,15 +74,15 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { private[sql] object V1Table { def addV2TableProperties(v1Table: CatalogTable): Map[String, String] = { val external = v1Table.tableType == CatalogTableType.EXTERNAL + val managed = v1Table.tableType == CatalogTableType.MANAGED v1Table.properties ++ v1Table.storage.properties.map { case (key, value) => TableCatalog.OPTION_PREFIX + key -> value } ++ v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++ v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++ - (if (external) { - v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) - } else None) ++ + v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) ++ + (if (managed) Some(TableCatalog.PROP_IS_MANAGED_LOCATION -> "true") else None) ++ (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++ Some(TableCatalog.PROP_OWNER -> v1Table.owner) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala index 06f5a08ffd9..2ad24b845c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala @@ -114,9 +114,13 @@ case class ShowCreateTableExec( } private def showTableLocation(table: Table, builder: StringBuilder): Unit = { - Option(table.properties.get(TableCatalog.PROP_LOCATION)) - .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n") - .foreach(builder.append) + val isManagedOption = Option(table.properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION)) + // Only generate LOCATION clause if it's not managed. + if (isManagedOption.forall(_.equalsIgnoreCase("false"))) { + Option(table.properties.get(TableCatalog.PROP_LOCATION)) + .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n") + .foreach(builder.append) + } } private def showTableProperties( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org