This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 8608baad7ab [SPARK-37878][SQL][FOLLOWUP] V1Table should always carry 
the "location" property
8608baad7ab is described below

commit 8608baad7ab31eef0903b9229789e8112c9c1234
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Wed May 11 14:49:54 2022 +0800

    [SPARK-37878][SQL][FOLLOWUP] V1Table should always carry the "location" 
property
    
    ### What changes were proposed in this pull request?
    
    This is a followup of https://github.com/apache/spark/pull/35204 . 
https://github.com/apache/spark/pull/35204 introduced a potential regression: 
it removes the "location" table property from `V1Table` if the table is not 
external. The intention was to avoid putting the LOCATION clause for managed 
tables in `ShowCreateTableExec`. However, if we use the v2 DESCRIBE TABLE 
command by default in the future, this will bring a behavior change and v2 
DESCRIBE TABLE command won't print the tab [...]
    
    This PR fixes this regression by using a different idea to fix the SHOW 
CREATE TABLE issue:
    1. introduce a new reserved table property `is_managed_location`, to 
indicate that the location is managed by the catalog, not user given.
    2. `ShowCreateTableExec` only generates the LOCATION clause if the 
"location" property is present and is not managed.
    
    ### Why are the changes needed?
    
    avoid a potential regression
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    existing tests. We can add a test when we use v2 DESCRIBE TABLE command by 
default.
    
    Closes #36498 from cloud-fan/regression.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit fa2bda5c4eabb23d5f5b3e14ccd055a2453f579f)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../org/apache/spark/sql/connector/catalog/TableCatalog.java |  6 ++++++
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala    | 12 ++++++++++--
 .../apache/spark/sql/connector/catalog/CatalogV2Util.scala   |  3 ++-
 .../org/apache/spark/sql/connector/catalog/V1Table.scala     |  6 +++---
 .../sql/execution/datasources/v2/ShowCreateTableExec.scala   | 10 +++++++---
 5 files changed, 28 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 9336c2a1cae..ec773ab90ad 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -47,6 +47,12 @@ public interface TableCatalog extends CatalogPlugin {
    */
   String PROP_LOCATION = "location";
 
+  /**
+   * A reserved property to indicate that the table location is managed, not 
user-specified.
+   * If this property is "true", SHOW CREATE TABLE will not generate the 
LOCATION clause.
+   */
+  String PROP_IS_MANAGED_LOCATION = "is_managed_location";
+
   /**
    * A reserved property to specify a table was created with EXTERNAL.
    */
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 19544346447..ecc5360a4f7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, 
IntervalUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, 
convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, 
stringToTimestamp, stringToTimestampWithoutTimeZone}
-import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, 
TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, 
SupportsNamespaces, TableCatalog}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, 
BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, 
HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, 
YearsTransform}
 import org.apache.spark.sql.errors.QueryParsingErrors
@@ -3215,7 +3215,15 @@ class AstBuilder extends 
SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
         throw QueryParsingErrors.cannotCleanReservedTablePropertyError(
           PROP_EXTERNAL, ctx, "please use CREATE EXTERNAL TABLE")
       case (PROP_EXTERNAL, _) => false
-      case _ => true
+      // It's safe to set whatever table comment, so we don't make it a 
reserved table property.
+      case (PROP_COMMENT, _) => true
+      case (k, _) =>
+        val isReserved = CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k)
+        if (!legacyOn && isReserved) {
+          throw QueryParsingErrors.cannotCleanReservedTablePropertyError(
+            k, ctx, "please remove it from the TBLPROPERTIES list.")
+        }
+        !isReserved
     }
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index 2fc13510c54..4c174ad7c4f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -48,7 +48,8 @@ private[sql] object CatalogV2Util {
       TableCatalog.PROP_LOCATION,
       TableCatalog.PROP_PROVIDER,
       TableCatalog.PROP_OWNER,
-      TableCatalog.PROP_EXTERNAL)
+      TableCatalog.PROP_EXTERNAL,
+      TableCatalog.PROP_IS_MANAGED_LOCATION)
 
   /**
    * The list of reserved namespace properties, which can not be removed or 
changed directly by
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index bf92107f6ae..da201e81649 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -74,15 +74,15 @@ private[sql] case class V1Table(v1Table: CatalogTable) 
extends Table {
 private[sql] object V1Table {
   def addV2TableProperties(v1Table: CatalogTable): Map[String, String] = {
     val external = v1Table.tableType == CatalogTableType.EXTERNAL
+    val managed = v1Table.tableType == CatalogTableType.MANAGED
 
     v1Table.properties ++
       v1Table.storage.properties.map { case (key, value) =>
         TableCatalog.OPTION_PREFIX + key -> value } ++
       v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++
       v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++
-      (if (external) {
-        v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> 
_.toString)
-      } else None) ++
+      v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> 
_.toString) ++
+      (if (managed) Some(TableCatalog.PROP_IS_MANAGED_LOCATION -> "true") else 
None) ++
       (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++
       Some(TableCatalog.PROP_OWNER -> v1Table.owner)
   }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 06f5a08ffd9..2ad24b845c2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -114,9 +114,13 @@ case class ShowCreateTableExec(
   }
 
   private def showTableLocation(table: Table, builder: StringBuilder): Unit = {
-    Option(table.properties.get(TableCatalog.PROP_LOCATION))
-      .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n")
-      .foreach(builder.append)
+    val isManagedOption = 
Option(table.properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION))
+    // Only generate LOCATION clause if it's not managed.
+    if (isManagedOption.forall(_.equalsIgnoreCase("false"))) {
+      Option(table.properties.get(TableCatalog.PROP_LOCATION))
+        .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n")
+        .foreach(builder.append)
+    }
   }
 
   private def showTableProperties(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to