This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 91f341f8fa79 fix: filter EXTERNAL property in
SparkCatalogMetaStoreClient.toCatalogTable (#18672)
91f341f8fa79 is described below
commit 91f341f8fa795879ccb32784ea7f12af0feab82d
Author: Prashant Wason <[email protected]>
AuthorDate: Mon May 4 19:06:26 2026 -0700
fix: filter EXTERNAL property in SparkCatalogMetaStoreClient.toCatalogTable
(#18672)
Hudi's `HMSDDLExecutor.createTable` sets both `tableType=EXTERNAL_TABLE`
and `parameters[EXTERNAL]=TRUE` on the Hive Table object when the table
is external. When that Table flows through `SparkCatalogMetaStoreClient`
into `HiveExternalCatalog`, `verifyTableProperties` rejects:
AnalysisException: Cannot set or change the preserved property key:
'EXTERNAL'
Spark uses `CatalogTableType.EXTERNAL` on the `CatalogTable` itself to
encode external-ness, and treats `EXTERNAL=...` as a duplicate (and
forbidden) encoding. We already map `tableType` correctly via
`if ("EXTERNAL_TABLE".equalsIgnoreCase(table.getTableType))`, so dropping
the property in the same filter that already strips `spark.sql.*` is safe.
Same family as #18654 (filter `spark.sql.*`).
Adds a regression test mirroring the real `HMSDDLExecutor` shape:
`tableType=EXTERNAL_TABLE` AND `parameters[EXTERNAL]=TRUE`.
Co-authored-by: Claude Opus 4.7 <[email protected]>
---
.../sql/hive/SparkCatalogMetaStoreClient.scala | 9 +++++++-
.../sql/hive/TestSparkCatalogMetaStoreClient.scala | 26 ++++++++++++++++++++++
2 files changed, 34 insertions(+), 1 deletion(-)
diff --git
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
index bd1133e5c31a..9fa225eeb876 100644
---
a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
+++
b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hive/SparkCatalogMetaStoreClient.scala
@@ -311,8 +311,15 @@ class SparkCatalogMetaStoreClient(syncConfig:
HiveSyncConfig)
// table property keys may not start with 'spark.sql.'") because they are
reserved for
// Spark's internal use (provider, schema parts, create version). Spark
re-derives and
// writes these from the CatalogTable itself, so dropping them on the way
in is safe.
+ //
+ // Also strip "EXTERNAL". HMSDDLExecutor.createTable sets both
+ // `tableType=EXTERNAL_TABLE` and `parameters[EXTERNAL]=TRUE`. Spark's
+ // HiveExternalCatalog.verifyTableProperties rejects "EXTERNAL" as a
property key
+ // ("Cannot set or change the preserved property key: 'EXTERNAL'") because
it controls
+ // table type via CatalogTableType instead. The tableType field below
already encodes
+ // that information, so dropping the property is safe.
val tableProperties =
Option(table.getParameters).map(_.asScala.toMap).getOrElse(Map.empty)
- .filterNot { case (k, _) => k.startsWith("spark.sql.") }
+ .filterNot { case (k, _) => k.startsWith("spark.sql.") || k ==
"EXTERNAL" }
CatalogTable(
identifier = TableIdentifier(tbl, Some(db)),
diff --git
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
index e6385ee93528..93b2be295e3d 100644
---
a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
+++
b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hive/TestSparkCatalogMetaStoreClient.scala
@@ -177,6 +177,32 @@ class TestSparkCatalogMetaStoreClient extends FunSuite
with BeforeAndAfterAll {
}
}
+ test("createTable accepts EXTERNAL=TRUE parameter (mirrors HMSDDLExecutor
behavior)") {
+ withTempDir { tmp =>
+ val client = newClient()
+ val databaseName = generateName("db")
+ val tableName = generateName("tbl")
+
+ client.createDatabase(new Database(databaseName, "test database", new
File(tmp, databaseName).toURI.toString, new util.HashMap[String, String]()))
+
+ // Hudi's HMSDDLExecutor.createTable sets BOTH
`tableType=EXTERNAL_TABLE` and
+ // `parameters[EXTERNAL]=TRUE` on the Hive Table object. Spark's
+ // HiveExternalCatalog.verifyTableProperties rejects "EXTERNAL" as a
property key
+ // unless we strip it in toCatalogTable. This test mirrors that
real-world shape.
+ val createdTable = newTable(
+ databaseName,
+ tableName,
+ new File(tmp, tableName).toURI.toString,
+ Seq("id" -> "int", "name" -> "string"),
+ Seq("dt" -> "string"),
+ Map("EXTERNAL" -> "TRUE", "comment" -> "v1"))
+
+ client.createTable(createdTable)
+ assertTrue(client.tableExists(databaseName, tableName))
+ assertEquals("v1", client.getTable(databaseName,
tableName).getParameters.get("comment"))
+ }
+ }
+
private def newClient(): SparkCatalogMetaStoreClient = {
SparkSession.setActiveSession(spark)
SparkSession.setDefaultSession(spark)