This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 91f341f8fa79 fix: filter EXTERNAL property in 
SparkCatalogMetaStoreClient.toCatalogTable (#18672)
91f341f8fa79 is described below

commit 91f341f8fa795879ccb32784ea7f12af0feab82d
Author: Prashant Wason <[email protected]>
AuthorDate: Mon May 4 19:06:26 2026 -0700

    fix: filter EXTERNAL property in SparkCatalogMetaStoreClient.toCatalogTable 
(#18672)
    
    Hudi's `HMSDDLExecutor.createTable` sets both `tableType=EXTERNAL_TABLE`
    and `parameters[EXTERNAL]=TRUE` on the Hive Table object when the table
    is external. When that Table flows through `SparkCatalogMetaStoreClient`
    into `HiveExternalCatalog`, `verifyTableProperties` rejects:
    
      AnalysisException: Cannot set or change the preserved property key:
      'EXTERNAL'
    
    Spark uses `CatalogTableType.EXTERNAL` on the `CatalogTable` itself to
    encode external-ness, and treats `EXTERNAL=...` as a duplicate (and
    forbidden) encoding. We already map `tableType` correctly via
    `if ("EXTERNAL_TABLE".equalsIgnoreCase(table.getTableType))`, so dropping
    the property in the same filter that already strips `spark.sql.*` is safe.
    
    Same family as #18654 (filter `spark.sql.*`).
    
    Adds a regression test mirroring the real `HMSDDLExecutor` shape:
    `tableType=EXTERNAL_TABLE` AND `parameters[EXTERNAL]=TRUE`.
    
    Co-authored-by: Claude Opus 4.7 <[email protected]>
---
 .../sql/hive/SparkCatalogMetaStoreClient.scala     |  9 +++++++-
 .../sql/hive/TestSparkCatalogMetaStoreClient.scala | 26 ++++++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
index bd1133e5c31a..9fa225eeb876 100644
--- 
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
+++ 
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
@@ -311,8 +311,15 @@ class SparkCatalogMetaStoreClient(syncConfig: 
HiveSyncConfig)
     // table property keys may not start with 'spark.sql.'") because they are 
reserved for
     // Spark's internal use (provider, schema parts, create version). Spark 
re-derives and
     // writes these from the CatalogTable itself, so dropping them on the way 
in is safe.
+    //
+    // Also strip "EXTERNAL". HMSDDLExecutor.createTable sets both
+    // `tableType=EXTERNAL_TABLE` and `parameters[EXTERNAL]=TRUE`. Spark's
+    // HiveExternalCatalog.verifyTableProperties rejects "EXTERNAL" as a 
property key
+    // ("Cannot set or change the preserved property key: 'EXTERNAL'") because 
it controls
+    // table type via CatalogTableType instead. The tableType field below 
already encodes
+    // that information, so dropping the property is safe.
     val tableProperties = 
Option(table.getParameters).map(_.asScala.toMap).getOrElse(Map.empty)
-      .filterNot { case (k, _) => k.startsWith("spark.sql.") }
+      .filterNot { case (k, _) => k.startsWith("spark.sql.") || k == 
"EXTERNAL" }
 
     CatalogTable(
       identifier = TableIdentifier(tbl, Some(db)),
diff --git 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
index e6385ee93528..93b2be295e3d 100644
--- 
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
+++ 
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
@@ -177,6 +177,32 @@ class TestSparkCatalogMetaStoreClient extends FunSuite 
with BeforeAndAfterAll {
     }
   }
 
+  test("createTable accepts EXTERNAL=TRUE parameter (mirrors HMSDDLExecutor 
behavior)") {
+    withTempDir { tmp =>
+      val client = newClient()
+      val databaseName = generateName("db")
+      val tableName = generateName("tbl")
+
+      client.createDatabase(new Database(databaseName, "test database", new 
File(tmp, databaseName).toURI.toString, new util.HashMap[String, String]()))
+
+      // Hudi's HMSDDLExecutor.createTable sets BOTH 
`tableType=EXTERNAL_TABLE` and
+      // `parameters[EXTERNAL]=TRUE` on the Hive Table object. Spark's
+      // HiveExternalCatalog.verifyTableProperties rejects "EXTERNAL" as a 
property key
+      // unless we strip it in toCatalogTable. This test mirrors that 
real-world shape.
+      val createdTable = newTable(
+        databaseName,
+        tableName,
+        new File(tmp, tableName).toURI.toString,
+        Seq("id" -> "int", "name" -> "string"),
+        Seq("dt" -> "string"),
+        Map("EXTERNAL" -> "TRUE", "comment" -> "v1"))
+
+      client.createTable(createdTable)
+      assertTrue(client.tableExists(databaseName, tableName))
+      assertEquals("v1", client.getTable(databaseName, 
tableName).getParameters.get("comment"))
+    }
+  }
+
   private def newClient(): SparkCatalogMetaStoreClient = {
     SparkSession.setActiveSession(spark)
     SparkSession.setDefaultSession(spark)

Reply via email to