[spark] branch master updated: [SPARK-44911][SQL] Create hive table with invalid column should return error class

maxgekk Tue, 12 Sep 2023 01:55:27 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 1e03db36a93 [SPARK-44911][SQL] Create hive table with invalid column 
should return error class
1e03db36a93 is described below

commit 1e03db36a939aea5b4d55059967ccde96cb29564
Author: zzzzming95 <505306...@qq.com>
AuthorDate: Tue Sep 12 11:55:08 2023 +0300

    [SPARK-44911][SQL] Create hive table with invalid column should return 
error class
    
    ### What changes were proposed in this pull request?
    
    create hive table with invalid column should return error class.
    
    run sql
    ```
    create table test stored as parquet as select id, date'2018-01-01' + 
make_dt_interval(0, id)  from range(0, 10)
    ```
    
    before this issue , error would be :
    
    ```
    org.apache.spark.sql.AnalysisException: Cannot create a table having a 
column whose name contains commas in Hive metastore. Table: 
`spark_catalog`.`default`.`test`; Column: DATE '2018-01-01' + 
make_dt_interval(0, id, 0, 0.000000)
            at 
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4(HiveExternalCatalog.scala:175)
            at 
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4$adapted(HiveExternalCatalog.scala:171)
            at scala.collection.Iterator.foreach(Iterator.scala:943)
    ```
    
    after this issue
    ```
    Exception in thread "main" org.apache.spark.sql.AnalysisException: 
[INVALID_HIVE_COLUMN_NAME] Cannot create the table 
`spark_catalog`.`default`.`parquet_ds1` having the column `DATE '2018-01-01' + 
make_dt_interval(0, id, 0, 0`.`000000)` whose name contains invalid characters 
',' in Hive metastore.
            at 
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4(HiveExternalCatalog.scala:180)
            at 
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$verifyDataSchema$4$adapted(HiveExternalCatalog.scala:171)
            at scala.collection.Iterator.foreach(Iterator.scala:943)
    ```
    
    ### Why are the changes needed?
    
    as above
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    add UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #42609 from zzzzming95/SPARK-44911.
    
    Authored-by: zzzzming95 <505306...@qq.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../src/main/resources/error/error-classes.json    |  2 +-
 docs/sql-error-conditions.md                       |  2 +-
 .../spark/sql/hive/HiveExternalCatalog.scala       | 11 ++++++++---
 .../spark/sql/hive/execution/HiveDDLSuite.scala    | 21 ++++++++++++++++++++
 .../spark/sql/hive/execution/SQLQuerySuite.scala   | 23 +++++++++++++++-------
 5 files changed, 47 insertions(+), 12 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index 415bdbaf42a..4740ed72f89 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -1587,7 +1587,7 @@
   },
   "INVALID_HIVE_COLUMN_NAME" : {
     "message" : [
-      "Cannot create the table <tableName> having the nested column 
<columnName> whose name contains invalid characters <invalidChars> in Hive 
metastore."
+      "Cannot create the table <tableName> having the column <columnName> 
whose name contains invalid characters <invalidChars> in Hive metastore."
     ]
   },
   "INVALID_IDENTIFIER" : {
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 0d54938593c..444c2b7c0d1 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -971,7 +971,7 @@ For more details see 
[INVALID_HANDLE](sql-error-conditions-invalid-handle-error-
 
 SQLSTATE: none assigned
 
-Cannot create the table `<tableName>` having the nested column `<columnName>` 
whose name contains invalid characters `<invalidChars>` in Hive metastore.
+Cannot create the table `<tableName>` having the column `<columnName>` whose 
name contains invalid characters `<invalidChars>` in Hive metastore.
 
 ### INVALID_IDENTIFIER
 
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index e4325989b70..67292460bbc 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -42,7 +42,7 @@ import 
org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, 
CharVarcharUtils}
-import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
+import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLId, toSQLValue}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, 
SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
@@ -172,8 +172,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
         f.dataType match {
           // Checks top-level column names
           case _ if f.name.contains(",") =>
-            throw new AnalysisException("Cannot create a table having a column 
whose name " +
-              s"contains commas in Hive metastore. Table: $tableName; Column: 
${f.name}")
+            throw new AnalysisException(
+              errorClass = "INVALID_HIVE_COLUMN_NAME",
+              messageParameters = Map(
+                "invalidChars" -> toSQLValue(","),
+                "tableName" -> toSQLId(tableName.nameParts),
+                "columnName" -> toSQLId(f.name)
+              ))
           // Checks nested column names
           case st: StructType =>
             verifyNestedColumnNames(st)
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 201ba5ea6a1..7a4a339e937 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -3374,4 +3374,25 @@ class HiveDDLSuite
       )
     }
   }
+
+  test("SPARK-44911: Create the table with invalid column") {
+    val tbl = "t1"
+    withTable(tbl) {
+      val e = intercept[AnalysisException] {
+        sql(
+          s"""
+             |CREATE TABLE t1
+             |STORED AS parquet
+             |SELECT id, DATE'2018-01-01' + MAKE_DT_INTERVAL(0, id) FROM 
RANGE(0, 10)
+         """.stripMargin)
+      }
+      checkError(e,
+        errorClass = "INVALID_HIVE_COLUMN_NAME",
+        parameters = Map(
+          "invalidChars" -> "','",
+          "tableName" -> "`spark_catalog`.`default`.`t1`",
+          "columnName" -> "`DATE '2018-01-01' + make_dt_interval(0, id, 0, 
0`.`000000)`")
+      )
+    }
+  }
 }
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 9308d1eda14..0ede331d99a 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2143,16 +2143,19 @@ abstract class SQLQuerySuiteBase extends QueryTest with 
SQLTestUtils with TestHi
 
   test("Auto alias construction of get_json_object") {
     val df = Seq(("1", """{"f1": "value1", "f5": 5.23}""")).toDF("key", 
"jstring")
-    val expectedMsg = "Cannot create a table having a column whose name 
contains commas " +
-      s"in Hive metastore. Table: `$SESSION_CATALOG_NAME`.`default`.`t`; 
Column: " +
-      "get_json_object(jstring, $.f1)"
 
     withTable("t") {
       val e = intercept[AnalysisException] {
         df.select($"key", functions.get_json_object($"jstring", "$.f1"))
           .write.format("hive").saveAsTable("t")
-      }.getMessage
-      assert(e.contains(expectedMsg))
+      }
+      checkError(e,
+        errorClass = "INVALID_HIVE_COLUMN_NAME",
+        parameters = Map(
+          "invalidChars" -> "','",
+          "tableName" -> "`spark_catalog`.`default`.`t`",
+          "columnName" -> "`get_json_object(jstring, $`.`f1)`")
+      )
     }
 
     withTempView("tempView") {
@@ -2161,8 +2164,14 @@ abstract class SQLQuerySuiteBase extends QueryTest with 
SQLTestUtils with TestHi
         val e = intercept[AnalysisException] {
           sql("CREATE TABLE t USING hive AS " +
             "SELECT key, get_json_object(jstring, '$.f1') FROM tempView")
-        }.getMessage
-        assert(e.contains(expectedMsg))
+        }
+        checkError(e,
+          errorClass = "INVALID_HIVE_COLUMN_NAME",
+          parameters = Map(
+            "invalidChars" -> "','",
+            "tableName" -> "`spark_catalog`.`default`.`t`",
+            "columnName" -> "`get_json_object(jstring, $`.`f1)`")
+        )
       }
     }
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-44911][SQL] Create hive table with invalid column should return error class

Reply via email to