This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1e03db36a93 [SPARK-44911][SQL] Create hive table with invalid column should return error class 1e03db36a93 is described below commit 1e03db36a939aea5b4d55059967ccde96cb29564 Author: zzzzming95 <505306...@qq.com> AuthorDate: Tue Sep 12 11:55:08 2023 +0300 [SPARK-44911][SQL] Create hive table with invalid column should return error class ### What changes were proposed in this pull request? create hive table with invalid column should return error class. run sql ``` create table test stored as parquet as select id, date'2018-01-01' + make_dt_interval(0, id) from range(0, 10) ``` before this issue , error would be : ``` org.apache.spark.sql.AnalysisException: Cannot create a table having a column whose name contains commas in Hive metastore. Table: `spark_catalog`.`default`.`test`; Column: DATE '2018-01-01' + make_dt_interval(0, id, 0, 0.000000) at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4(HiveExternalCatalog.scala:175) at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4$adapted(HiveExternalCatalog.scala:171) at scala.collection.Iterator.foreach(Iterator.scala:943) ``` after this issue ``` Exception in thread "main" org.apache.spark.sql.AnalysisException: [INVALID_HIVE_COLUMN_NAME] Cannot create the table `spark_catalog`.`default`.`parquet_ds1` having the column `DATE '2018-01-01' + make_dt_interval(0, id, 0, 0`.`000000)` whose name contains invalid characters ',' in Hive metastore. at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4(HiveExternalCatalog.scala:180) at org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4$adapted(HiveExternalCatalog.scala:171) at scala.collection.Iterator.foreach(Iterator.scala:943) ``` ### Why are the changes needed? as above ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? add UT ### Was this patch authored or co-authored using generative AI tooling? no Closes #42609 from zzzzming95/SPARK-44911. Authored-by: zzzzming95 <505306...@qq.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../src/main/resources/error/error-classes.json | 2 +- docs/sql-error-conditions.md | 2 +- .../spark/sql/hive/HiveExternalCatalog.scala | 11 ++++++++--- .../spark/sql/hive/execution/HiveDDLSuite.scala | 21 ++++++++++++++++++++ .../spark/sql/hive/execution/SQLQuerySuite.scala | 23 +++++++++++++++------- 5 files changed, 47 insertions(+), 12 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 415bdbaf42a..4740ed72f89 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1587,7 +1587,7 @@ }, "INVALID_HIVE_COLUMN_NAME" : { "message" : [ - "Cannot create the table <tableName> having the nested column <columnName> whose name contains invalid characters <invalidChars> in Hive metastore." + "Cannot create the table <tableName> having the column <columnName> whose name contains invalid characters <invalidChars> in Hive metastore." ] }, "INVALID_IDENTIFIER" : { diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 0d54938593c..444c2b7c0d1 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -971,7 +971,7 @@ For more details see [INVALID_HANDLE](sql-error-conditions-invalid-handle-error- SQLSTATE: none assigned -Cannot create the table `<tableName>` having the nested column `<columnName>` whose name contains invalid characters `<invalidChars>` in Hive metastore. +Cannot create the table `<tableName>` having the column `<columnName>` whose name contains invalid characters `<invalidChars>` in Hive metastore. ### INVALID_IDENTIFIER diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index e4325989b70..67292460bbc 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils} -import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId +import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLId, toSQLValue} import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions} import org.apache.spark.sql.hive.client.HiveClient @@ -172,8 +172,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat f.dataType match { // Checks top-level column names case _ if f.name.contains(",") => - throw new AnalysisException("Cannot create a table having a column whose name " + - s"contains commas in Hive metastore. Table: $tableName; Column: ${f.name}") + throw new AnalysisException( + errorClass = "INVALID_HIVE_COLUMN_NAME", + messageParameters = Map( + "invalidChars" -> toSQLValue(","), + "tableName" -> toSQLId(tableName.nameParts), + "columnName" -> toSQLId(f.name) + )) // Checks nested column names case st: StructType => verifyNestedColumnNames(st) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 201ba5ea6a1..7a4a339e937 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -3374,4 +3374,25 @@ class HiveDDLSuite ) } } + + test("SPARK-44911: Create the table with invalid column") { + val tbl = "t1" + withTable(tbl) { + val e = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE t1 + |STORED AS parquet + |SELECT id, DATE'2018-01-01' + MAKE_DT_INTERVAL(0, id) FROM RANGE(0, 10) + """.stripMargin) + } + checkError(e, + errorClass = "INVALID_HIVE_COLUMN_NAME", + parameters = Map( + "invalidChars" -> "','", + "tableName" -> "`spark_catalog`.`default`.`t1`", + "columnName" -> "`DATE '2018-01-01' + make_dt_interval(0, id, 0, 0`.`000000)`") + ) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 9308d1eda14..0ede331d99a 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2143,16 +2143,19 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi test("Auto alias construction of get_json_object") { val df = Seq(("1", """{"f1": "value1", "f5": 5.23}""")).toDF("key", "jstring") - val expectedMsg = "Cannot create a table having a column whose name contains commas " + - s"in Hive metastore. Table: `$SESSION_CATALOG_NAME`.`default`.`t`; Column: " + - "get_json_object(jstring, $.f1)" withTable("t") { val e = intercept[AnalysisException] { df.select($"key", functions.get_json_object($"jstring", "$.f1")) .write.format("hive").saveAsTable("t") - }.getMessage - assert(e.contains(expectedMsg)) + } + checkError(e, + errorClass = "INVALID_HIVE_COLUMN_NAME", + parameters = Map( + "invalidChars" -> "','", + "tableName" -> "`spark_catalog`.`default`.`t`", + "columnName" -> "`get_json_object(jstring, $`.`f1)`") + ) } withTempView("tempView") { @@ -2161,8 +2164,14 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi val e = intercept[AnalysisException] { sql("CREATE TABLE t USING hive AS " + "SELECT key, get_json_object(jstring, '$.f1') FROM tempView") - }.getMessage - assert(e.contains(expectedMsg)) + } + checkError(e, + errorClass = "INVALID_HIVE_COLUMN_NAME", + parameters = Map( + "invalidChars" -> "','", + "tableName" -> "`spark_catalog`.`default`.`t`", + "columnName" -> "`get_json_object(jstring, $`.`f1)`") + ) } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org