This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e04ac56e645f [SPARK-45225][SQL][FOLLOW-UP] XML: Fix nested XSD file path resolution e04ac56e645f is described below commit e04ac56e645f1c0ed5f5134686ddebdbae524d12 Author: Sandip Agarwala <131817656+sandip...@users.noreply.github.com> AuthorDate: Fri Apr 26 17:21:32 2024 +0900 [SPARK-45225][SQL][FOLLOW-UP] XML: Fix nested XSD file path resolution ### What changes were proposed in this pull request? This PR adds support to correctly resolve the path of nested XSD provided with `rowValidationXSDPath` option and `XSDToSchema` API. ### Why are the changes needed? Nested XSD were not resolved correctly. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Added a new test ### Was this patch authored or co-authored using generative AI tooling? No Closes #46235 from sandip-db/xml_nested_xsd. Authored-by: Sandip Agarwala <131817656+sandip...@users.noreply.github.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../apache/spark/sql/catalyst/xml/ValidatorUtil.scala | 2 +- .../sql/execution/datasources/xml/XSDToSchema.scala | 2 +- .../spark/sql/execution/datasources/xml/XmlSuite.scala | 18 ++++++++++-------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala index 3d93c4e8742a..a49de687a27d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala @@ -42,7 +42,7 @@ object ValidatorUtil extends Logging { val in = openSchemaFile(new Path(key)) try { val schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI) - schemaFactory.newSchema(new StreamSource(in)) + schemaFactory.newSchema(new StreamSource(in, key)) } finally { in.close() } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala index 87082299615c..c03c0ba11de5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala @@ -47,7 +47,7 @@ object XSDToSchema extends Logging{ def read(xsdPath: Path): StructType = { val in = ValidatorUtil.openSchemaFile(xsdPath) val xmlSchemaCollection = new XmlSchemaCollection() - xmlSchemaCollection.setBaseUri(xsdPath.getParent.toString) + xmlSchemaCollection.setBaseUri(xsdPath.toString) val xmlSchema = xmlSchemaCollection.read(new InputStreamReader(in)) getStructType(xmlSchema) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala index 7df7c0d49d19..51e8cfc7f103 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala @@ -1206,14 +1206,16 @@ class XmlSuite } test("test XSD validation") { - val basketDF = spark.read - .option("rowTag", "basket") - .option("inferSchema", true) - .option("rowValidationXSDPath", getTestResourcePath(resDir + "basket.xsd") - .replace("file:/", "/")) - .xml(getTestResourcePath(resDir + "basket.xml")) - // Mostly checking it doesn't fail - assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027) + Seq("basket.xsd", "include-example/first.xsd").foreach { xsdFile => + val basketDF = spark.read + .option("rowTag", "basket") + .option("inferSchema", true) + .option("rowValidationXSDPath", getTestResourcePath(resDir + xsdFile) + .replace("file:/", "/")) + .xml(getTestResourcePath(resDir + "basket.xml")) + // Mostly checking it doesn't fail + assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027) + } } test("test XSD validation with validation error") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org