This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 2f5e44777 Fix error handling when unparsing invalid infosets
2f5e44777 is described below

commit 2f5e44777b063d2789399a2f258cf0d35bc18e08
Author: Steve Lawrence <[email protected]>
AuthorDate: Wed Aug 6 12:44:59 2025 -0400

    Fix error handling when unparsing invalid infosets
    
    When the XMLTextInfosetInputter hits an error, it uses an
    XMLEventAllocator to create an event that we can get the current
    location from. However, if trailing data exists after the infoset then
    creating this event will fail with a null pointer exception. This does
    not appear to be a bug in woodstox, but is really just incorrect usage
    of the API.
    
    Fortunately, the XMLStreamReader already provides a getLocation function
    to get the current parse location. This is the preferred mechanism for
    this, so all instances of the allocator are replaced with the
    getLocation function. This also improves the error messages since
    getLocation provides column information in addition to line numbers,
    which could be useful for non-pretty printed infosets that are on a
    single line.
    
    In cases where the error comes from an XMLStreamException, we don't need
    to get location information at all since Woodstox already includes it in
    the exception message.
    
    DAFFODIL-3003
---
 .../daffodil/cli/cliTest/TestCLIUnparsing.scala    | 12 ++++++
 .../runtime1/infoset/XMLTextInfosetInputter.scala  | 45 +++++-----------------
 2 files changed, 22 insertions(+), 35 deletions(-)

diff --git 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
index 7ec5378e0..719a87f51 100644
--- 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
+++ 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
@@ -467,4 +467,16 @@ class TestCLIUnparsing {
     }(ExitCode.Success)
   }
 
+  @Test def test_CLI_Unparsing_extra_data(): Unit = {
+    val schema = path(
+      
"daffodil-test/src/test/resources/org/apache/daffodil/section00/general/generalSchema.dfdl.xsd"
+    )
+
+    runCLI(args"unparse -s $schema --root e1") { cli =>
+      cli.send("<e1 xmlns='http://example.com'>Hello</e1>extra", inputDone = 
true)
+      cli.expectErr("Unexpected character 'e'")
+      cli.expectErr("[row,col {unknown-source}]: [1,42]")
+    }(ExitCode.UnparseError)
+  }
+
 }
diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
index 18640b4fa..1da9b0bc8 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
@@ -26,16 +26,16 @@ import javax.xml.stream.XMLStreamConstants.*
 import javax.xml.stream.XMLStreamException
 import javax.xml.stream.XMLStreamReader
 import javax.xml.stream.XMLStreamWriter
-import javax.xml.stream.util.XMLEventAllocator
 
 import org.apache.daffodil.api
 import org.apache.daffodil.api.infoset.Infoset.InfosetInputterEventType
 import org.apache.daffodil.api.infoset.Infoset.InfosetInputterEventType.*
 import org.apache.daffodil.lib.exceptions.Assert
-import org.apache.daffodil.lib.util.Misc
 import org.apache.daffodil.lib.xml.XMLUtils
 import org.apache.daffodil.runtime1.dpath.NodeInfo
 
+import com.ctc.wstx.cfg.ErrorConsts;
+
 object XMLTextInfoset {
   lazy val xmlInputFactory = {
     val fact = new com.ctc.wstx.stax.WstxInputFactory()
@@ -189,25 +189,14 @@ object XMLTextInfoset {
 
 class XMLTextInfosetInputter(input: java.io.InputStream) extends 
api.infoset.InfosetInputter {
 
-  /**
-   * evAlloc is only to be used for diagnostic messages. It lets us easily
-   * capture and toString the event information. But we don't call it otherwise
-   * as it allocates an object, and we're trying to avoid that.
-   */
-  private lazy val (xsr: XMLStreamReader, evAlloc: XMLEventAllocator) = {
+  private lazy val xsr: XMLStreamReader = {
     val xsr = XMLTextInfoset.xmlInputFactory.createXMLStreamReader(input)
 
-    //
-    // This gets the event allocator corresponding to the xmlStreamReader just 
created.
-    // Strange API. They should let you get this from the xmlStreamReader 
itself.
-    //
-    val evAlloc = XMLTextInfoset.xmlInputFactory.getEventAllocator
-
     // no need for UnparseError here. If the XML syntax is bad, parser catches 
it before we get here.
     Assert.invariant(xsr.hasNext())
     val evNum = xsr.getEventType()
     Assert.invariant(evNum == START_DOCUMENT)
-    (xsr, evAlloc)
+    xsr
   }
 
   /**
@@ -307,10 +296,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream) 
extends api.infoset.Inf
           gatherXmlAsString()
         } catch {
           case xse: XMLStreamException => {
-            val lineNum = evAlloc.allocate(xsr).getLocation.getLineNumber
-            throw new InvalidInfosetException(
-              "Error on line " + lineNum + ": " + xse.getMessage
-            )
+            throw new InvalidInfosetException(xse.getMessage())
           }
         }
       } else {
@@ -319,9 +305,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream) 
extends api.infoset.Inf
             xsr.getElementText()
           } catch {
             case xse: XMLStreamException => {
-              throw new NonTextFoundInSimpleContentException(
-                "Error on line " + 
evAlloc.allocate(xsr).getLocation.getLineNumber
-              )
+              throw new NonTextFoundInSimpleContentException(xse.getMessage)
             }
           }
         if (primType == NodeInfo.String) {
@@ -354,10 +338,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream) 
extends api.infoset.Inf
         false
       } else {
         throw new InvalidInfosetException(
-          "xsi:nil property is not a valid boolean: '" + nilAttrValue + "' on 
line " + evAlloc
-            .allocate(xsr)
-            .getLocation
-            .getLineNumber
+          s"xsi:nil property is not a valid boolean: '$nilAttrValue' at 
${xsr.getLocation}"
         )
       }
     res
@@ -403,10 +384,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream) 
extends api.infoset.Inf
           xsr.next()
         } catch {
           case xse: XMLStreamException => {
-            val details = "Error: " + Misc
-              .getSomeMessage(xse)
-              .get + " on line " + 
evAlloc.allocate(xsr).getLocation.getLineNumber
-            throw new IllegalContentWhereEventExpected(details)
+            throw new IllegalContentWhereEventExpected(xse.getMessage)
           }
         }
       result match {
@@ -416,14 +394,11 @@ class XMLTextInfosetInputter(input: java.io.InputStream) 
extends api.infoset.Inf
         case SPACE | PROCESSING_INSTRUCTION | COMMENT => // skip these too
         case DTD =>
           throw new IllegalContentWhereEventExpected(
-            "DOCTYPE/DTD Not supported. Error on line " + evAlloc
-              .allocate(xsr)
-              .getLocation
-              .getLineNumber
+            s"DOCTYPE/DTD Not supported. Error at ${xsr.getLocation}"
           )
         case other =>
           throw new IllegalContentWhereEventExpected(
-            "Error on line " + evAlloc.allocate(xsr).getLocation.getLineNumber 
+ " : " + other
+            s"Unexpected token: ${ErrorConsts.tokenTypeDesc(other)}. Error at 
${xsr.getLocation}"
           )
       }
     }

Reply via email to