This is an automated email from the ASF dual-hosted git repository.
HyukjinKwon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new e3c519e86f8a [SPARK-56859][SQL] Create user error for
IllegalArgumentException in CSVOptions
e3c519e86f8a is described below
commit e3c519e86f8aab87825f079efb1a01965e686fb5
Author: Shivadarshan Devadiga <[email protected]>
AuthorDate: Wed May 20 15:33:38 2026 -0700
[SPARK-56859][SQL] Create user error for IllegalArgumentException in
CSVOptions
### What changes were proposed in this pull request?
This PR converts IllegalArgumentException thrown by require() statements in
CSVOptions to proper user-facing Spark errors with error classes
### Why are the changes needed?
Currently, CSVOptions uses Scala's require() which throws generic
IllegalArgumentException with plain text messages. This change aligns lineSep
validation with Spark's error handling standards, making errors more actionable
and consistent.
### Does this PR introduce _any_ user-facing change?
Yes. The error messages for invalid lineSep values will change
### How was this patch tested?
All SQL related tests were run including the linsep tests
### Was this patch authored or co-authored using generative AI tooling?
No
cc: markj-db MaxGekk HyukjinKwon
Closes #55877 from shivadarshan-devadiga/cveoptions-user-error.
Authored-by: Shivadarshan Devadiga <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 23 ++++++++++++++++++++++
.../apache/spark/sql/catalyst/csv/CSVOptions.scala | 14 ++++++++-----
.../spark/sql/errors/QueryExecutionErrors.scala | 16 +++++++++++++++
.../sql/execution/datasources/csv/CSVSuite.scala | 22 +++++++++++++--------
4 files changed, 62 insertions(+), 13 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index 81ef749e19f8..119a1d5d42b4 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -4136,6 +4136,29 @@
},
"sqlState" : "42K0E"
},
+ "INVALID_LINE_SEPARATOR" : {
+ "message" : [
+ "Invalid line separator configuration."
+ ],
+ "subClass" : {
+ "EMPTY" : {
+ "message" : [
+ "The 'lineSep' option cannot be an empty string."
+ ]
+ },
+ "NULL" : {
+ "message" : [
+ "The 'lineSep' option cannot be a null value."
+ ]
+ },
+ "TOO_LONG" : {
+ "message" : [
+ "The 'lineSep' option can contain at most 2 characters, but got
<length> characters."
+ ]
+ }
+ },
+ "sqlState" : "22023"
+ },
"INVALID_LOG_VERSION" : {
"message" : [
"UnsupportedLogVersion."
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 9edb1603f463..4ae7aad19a9c 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -284,11 +284,15 @@ class CSVOptions(
* A string between two consecutive JSON records.
*/
val lineSeparator: Option[String] = parameters.get(LINE_SEP).map { sep =>
- require(sep != null, "'lineSep' cannot be a null value.")
- require(sep.nonEmpty, "'lineSep' cannot be an empty string.")
- // Intentionally allow it up to 2 for Window's CRLF although multiple
- // characters have an issue with quotes. This is intentionally
undocumented.
- require(sep.length <= 2, "'lineSep' can contain only 1 character.")
+ if (sep == null) {
+ throw QueryExecutionErrors.lineSepCannotBeNullError()
+ }
+ if (sep.isEmpty) {
+ throw QueryExecutionErrors.lineSepCannotBeEmptyError()
+ }
+ if (sep.length > 2) {
+ throw QueryExecutionErrors.lineSepTooLongError(sep.length)
+ }
if (sep.length == 2) logWarning("It is not recommended to set 'lineSep' " +
"with 2 characters due to the limitation of supporting multi-char
'lineSep' within quotes.")
sep
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 0aa830827687..b97244c1c280 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -3349,4 +3349,20 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase with ExecutionE
"expectedFamily" -> expectedFamily,
"actualFamily" -> actualFamily))
}
+
+ def lineSepCannotBeNullError(): SparkIllegalArgumentException = {
+ new SparkIllegalArgumentException(
+ errorClass = "INVALID_LINE_SEPARATOR.NULL")
+ }
+
+ def lineSepCannotBeEmptyError(): SparkIllegalArgumentException = {
+ new SparkIllegalArgumentException(
+ errorClass = "INVALID_LINE_SEPARATOR.EMPTY")
+ }
+
+ def lineSepTooLongError(length: Int): SparkIllegalArgumentException = {
+ new SparkIllegalArgumentException(
+ errorClass = "INVALID_LINE_SEPARATOR.TOO_LONG",
+ messageParameters = Map("length" -> length.toString))
+ }
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 24f80f4b928f..22b291677cd8 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2484,15 +2484,21 @@ abstract class CSVSuite
// scalastyle:on nonascii
test("lineSep restrictions") {
- val errMsg1 = intercept[IllegalArgumentException] {
- spark.read.option("lineSep", "").csv(testFile(carsFile)).collect()
- }.getMessage
- assert(errMsg1.contains("'lineSep' cannot be an empty string"))
+ checkError(
+ exception = intercept[SparkIllegalArgumentException] {
+ spark.read.option("lineSep", "").csv(testFile(carsFile)).collect()
+ },
+ condition = "INVALID_LINE_SEPARATOR.EMPTY",
+ parameters = Map.empty
+ )
- val errMsg2 = intercept[IllegalArgumentException] {
- spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect()
- }.getMessage
- assert(errMsg2.contains("'lineSep' can contain only 1 character"))
+ checkError(
+ exception = intercept[SparkIllegalArgumentException] {
+ spark.read.option("lineSep", "123").csv(testFile(carsFile)).collect()
+ },
+ condition = "INVALID_LINE_SEPARATOR.TOO_LONG",
+ parameters = Map("length" -> "3")
+ )
}
Seq(true, false).foreach { multiLine =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]