This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f5b1b8306cf [SPARK-45562][SQL] XML: Add SQL error class for missing rowTag option f5b1b8306cf is described below commit f5b1b8306cf13218f5ff79944aaa9c0b4e74fda4 Author: Sandip Agarwala <131817656+sandip...@users.noreply.github.com> AuthorDate: Fri Nov 10 17:44:39 2023 +0900 [SPARK-45562][SQL] XML: Add SQL error class for missing rowTag option ### What changes were proposed in this pull request? rowTag option is required for reading XML files. This PR adds a SQL error class for missing rowTag option. ### Why are the changes needed? rowTag option is required for reading XML files. This PR adds a SQL error class for missing rowTag option. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Updated the unit test to check for error message. ### Was this patch authored or co-authored using generative AI tooling? No Closes #43710 from sandip-db/xml-rowTagRequiredError. Authored-by: Sandip Agarwala <131817656+sandip...@users.noreply.github.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- common/utils/src/main/resources/error/error-classes.json | 6 ++++++ docs/sql-error-conditions.md | 6 ++++++ .../scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala | 8 ++++++-- .../org/apache/spark/sql/errors/QueryCompilationErrors.scala | 7 +++++++ .../apache/spark/sql/execution/datasources/xml/XmlSuite.scala | 11 ++++++++--- 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 26f6c0240af..3b7a3a6006e 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -3911,6 +3911,12 @@ }, "sqlState" : "42605" }, + "XML_ROW_TAG_MISSING" : { + "message" : [ + "<rowTag> option is required for reading files in XML format." + ], + "sqlState" : "42000" + }, "_LEGACY_ERROR_TEMP_0001" : { "message" : [ "Invalid InsertIntoContext." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 2cb433b19fa..a811019e0a5 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -2369,3 +2369,9 @@ The operation `<operation>` requires a `<requiredType>`. But `<objectName>` is a The `<functionName>` requires `<expectedNum>` parameters but the actual number is `<actualNum>`. For more details see [WRONG_NUM_ARGS](sql-error-conditions-wrong-num-args-error-class.html) + +### XML_ROW_TAG_MISSING + +[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +`<rowTag>` option is required for reading files in XML format. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala index aac6eec21c6..8f6cdbf360e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala @@ -24,7 +24,7 @@ import javax.xml.stream.XMLInputFactory import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions} import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs, DateFormatter, DateTimeUtils, ParseMode, PermissiveMode} -import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf} /** @@ -66,7 +66,11 @@ private[sql] class XmlOptions( val compressionCodec = parameters.get(COMPRESSION).map(CompressionCodecs.getCodecClassName) val rowTagOpt = parameters.get(XmlOptions.ROW_TAG).map(_.trim) - require(!rowTagRequired || rowTagOpt.isDefined, s"'${XmlOptions.ROW_TAG}' option is required.") + + if (rowTagRequired && rowTagOpt.isEmpty) { + throw QueryCompilationErrors.xmlRowTagRequiredError(XmlOptions.ROW_TAG) + } + val rowTag = rowTagOpt.getOrElse(XmlOptions.DEFAULT_ROW_TAG) require(rowTag.nonEmpty, s"'$ROW_TAG' option should not be an empty string.") require(!rowTag.startsWith("<") && !rowTag.endsWith(">"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 0c5dcb1ead0..e772b3497ac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -3817,4 +3817,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat errorClass = "FOUND_MULTIPLE_DATA_SOURCES", messageParameters = Map("provider" -> provider)) } + + def xmlRowTagRequiredError(optionName: String): Throwable = { + new AnalysisException( + errorClass = "XML_ROW_TAG_MISSING", + messageParameters = Map("rowTag" -> toSQLId(optionName)) + ) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala index 2d4cd2f403c..21122676c46 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.{AnalysisException, Dataset, Encoders, QueryTest, Ro import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.catalyst.xml.XmlOptions import org.apache.spark.sql.catalyst.xml.XmlOptions._ +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.xml.TestUtils._ import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSparkSession @@ -1782,17 +1783,21 @@ class XmlSuite extends QueryTest with SharedSparkSession { test("Test XML Options Error Messages") { def checkXmlOptionErrorMessage( parameters: Map[String, String] = Map.empty, - msg: String): Unit = { - val e = intercept[IllegalArgumentException] { + msg: String, + exception: Throwable = new IllegalArgumentException().getCause): Unit = { + val e = intercept[Exception] { spark.read .options(parameters) .xml(getTestResourcePath(resDir + "ages.xml")) .collect() } + assert(e.getCause === exception) assert(e.getMessage.contains(msg)) } - checkXmlOptionErrorMessage(Map.empty, "'rowTag' option is required.") + checkXmlOptionErrorMessage(Map.empty, + "[XML_ROW_TAG_MISSING] `rowTag` option is required for reading files in XML format.", + QueryCompilationErrors.xmlRowTagRequiredError(XmlOptions.ROW_TAG).getCause) checkXmlOptionErrorMessage(Map("rowTag" -> ""), "'rowTag' option should not be an empty string.") checkXmlOptionErrorMessage(Map("rowTag" -> " "), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org