This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e04ac56e645f [SPARK-45225][SQL][FOLLOW-UP] XML: Fix nested XSD file 
path resolution
e04ac56e645f is described below

commit e04ac56e645f1c0ed5f5134686ddebdbae524d12
Author: Sandip Agarwala <131817656+sandip...@users.noreply.github.com>
AuthorDate: Fri Apr 26 17:21:32 2024 +0900

    [SPARK-45225][SQL][FOLLOW-UP] XML: Fix nested XSD file path resolution
    
    ### What changes were proposed in this pull request?
    This PR adds support to correctly resolve the path of nested XSD provided 
with `rowValidationXSDPath` option and `XSDToSchema` API.
    
    ### Why are the changes needed?
    Nested XSD were not resolved correctly.
    
    ### Does this PR introduce _any_ user-facing change?
    Yes
    
    ### How was this patch tested?
    Added a new test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #46235 from sandip-db/xml_nested_xsd.
    
    Authored-by: Sandip Agarwala <131817656+sandip...@users.noreply.github.com>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../apache/spark/sql/catalyst/xml/ValidatorUtil.scala  |  2 +-
 .../sql/execution/datasources/xml/XSDToSchema.scala    |  2 +-
 .../spark/sql/execution/datasources/xml/XmlSuite.scala | 18 ++++++++++--------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
index 3d93c4e8742a..a49de687a27d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
@@ -42,7 +42,7 @@ object ValidatorUtil extends Logging {
         val in = openSchemaFile(new Path(key))
         try {
           val schemaFactory = 
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
-          schemaFactory.newSchema(new StreamSource(in))
+          schemaFactory.newSchema(new StreamSource(in, key))
         } finally {
           in.close()
         }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala
index 87082299615c..c03c0ba11de5 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala
@@ -47,7 +47,7 @@ object XSDToSchema extends Logging{
   def read(xsdPath: Path): StructType = {
     val in = ValidatorUtil.openSchemaFile(xsdPath)
     val xmlSchemaCollection = new XmlSchemaCollection()
-    xmlSchemaCollection.setBaseUri(xsdPath.getParent.toString)
+    xmlSchemaCollection.setBaseUri(xsdPath.toString)
     val xmlSchema = xmlSchemaCollection.read(new InputStreamReader(in))
     getStructType(xmlSchema)
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
index 7df7c0d49d19..51e8cfc7f103 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
@@ -1206,14 +1206,16 @@ class XmlSuite
   }
 
   test("test XSD validation") {
-    val basketDF = spark.read
-      .option("rowTag", "basket")
-      .option("inferSchema", true)
-      .option("rowValidationXSDPath", getTestResourcePath(resDir + 
"basket.xsd")
-        .replace("file:/", "/"))
-      .xml(getTestResourcePath(resDir + "basket.xml"))
-    // Mostly checking it doesn't fail
-    assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027)
+    Seq("basket.xsd", "include-example/first.xsd").foreach { xsdFile =>
+      val basketDF = spark.read
+        .option("rowTag", "basket")
+        .option("inferSchema", true)
+        .option("rowValidationXSDPath", getTestResourcePath(resDir + xsdFile)
+          .replace("file:/", "/"))
+        .xml(getTestResourcePath(resDir + "basket.xml"))
+      // Mostly checking it doesn't fail
+      assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027)
+    }
   }
 
   test("test XSD validation with validation error") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to