This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 747437c80aa8 [SPARK-48476][SQL] fix NPE error message for null delmiter csv 747437c80aa8 is described below commit 747437c80aa875844f41ac61a419443af9f3b4b2 Author: milastdbx <milan.stefano...@databricks.com> AuthorDate: Fri May 31 09:10:38 2024 -0700 [SPARK-48476][SQL] fix NPE error message for null delmiter csv ### What changes were proposed in this pull request? In this pull request i propose we throw proper error code when customer specifies null as a delimiter for CSV. Currently we throw NPE. ### Why are the changes needed? To make spark more user friendly. ### Does this PR introduce _any_ user-facing change? Yes, customer will now get INVALID_DELIMITER_VALUE.NULL_VALUE error class when they specify null for delimiter of csv. ### How was this patch tested? unit test ### Was this patch authored or co-authored using generative AI tooling? No Closes #46810 from milastdbx/dev/milast/fixNPEForDelimiterCSV. Authored-by: milastdbx <milan.stefano...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- common/utils/src/main/resources/error/error-conditions.json | 5 +++++ .../scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala | 5 +++++ .../org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala | 9 +++++++++ 3 files changed, 19 insertions(+) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 3914c0f177dc..3dd7a6d65d7f 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -2021,6 +2021,11 @@ "Delimiter cannot be empty string." ] }, + "NULL_VALUE" : { + "message" : [ + "Delimiter cannot be null." + ] + }, "SINGLE_BACKSLASH" : { "message" : [ "Single backslash is prohibited. It has special meaning as beginning of an escape sequence. To get the backslash character, pass a string with two backslashes as the delimiter." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala index 62638d70dd90..7b6664a4117a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala @@ -120,6 +120,11 @@ object CSVExprUtils { * @throws SparkIllegalArgumentException if any of the individual input chunks are illegal */ def toDelimiterStr(str: String): String = { + if (str == null) { + throw new SparkIllegalArgumentException( + errorClass = "INVALID_DELIMITER_VALUE.NULL_VALUE") + } + var idx = 0 var delimiter = "" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala index 2e94c723a6f2..d4b68500e078 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala @@ -33,6 +33,15 @@ class CSVExprUtilsSuite extends SparkFunSuite { assert(CSVExprUtils.toChar("""\\""") === '\\') } + test("Does not accept null delimiter") { + checkError( + exception = intercept[SparkIllegalArgumentException]{ + CSVExprUtils.toDelimiterStr(null) + }, + errorClass = "INVALID_DELIMITER_VALUE.NULL_VALUE", + parameters = Map.empty) + } + test("Does not accept delimiter larger than one character") { checkError( exception = intercept[SparkIllegalArgumentException]{ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org