This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 2f5e44777 Fix error handling when unparsing invalid infosets
2f5e44777 is described below
commit 2f5e44777b063d2789399a2f258cf0d35bc18e08
Author: Steve Lawrence <[email protected]>
AuthorDate: Wed Aug 6 12:44:59 2025 -0400
Fix error handling when unparsing invalid infosets
When the XMLTextInfosetInputter hits an error, it uses an
XMLEventAllocator to create an event that we can get the current
location from. However, if trailing data exists after the infoset then
creating this event will fail with a null pointer exception. This does
not appear to be a bug in woodstox, but is really just incorrect usage
of the API.
Fortunately, the XMLStreamReader already provides a getLocation function
to get the current parse location. This is the preferred mechanism for
this, so all instances of the allocator are replaced with the
getLocation function. This also improves the error messages since
getLocation provides column information in addition to line numbers,
which could be useful for non-pretty printed infosets that are on a
single line.
In cases where the error comes from an XMLStreamException, we don't need
to get location information at all since Woodstox already includes it in
the exception message.
DAFFODIL-3003
---
.../daffodil/cli/cliTest/TestCLIUnparsing.scala | 12 ++++++
.../runtime1/infoset/XMLTextInfosetInputter.scala | 45 +++++-----------------
2 files changed, 22 insertions(+), 35 deletions(-)
diff --git
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
index 7ec5378e0..719a87f51 100644
---
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
+++
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIUnparsing.scala
@@ -467,4 +467,16 @@ class TestCLIUnparsing {
}(ExitCode.Success)
}
+ @Test def test_CLI_Unparsing_extra_data(): Unit = {
+ val schema = path(
+
"daffodil-test/src/test/resources/org/apache/daffodil/section00/general/generalSchema.dfdl.xsd"
+ )
+
+ runCLI(args"unparse -s $schema --root e1") { cli =>
+ cli.send("<e1 xmlns='http://example.com'>Hello</e1>extra", inputDone =
true)
+ cli.expectErr("Unexpected character 'e'")
+ cli.expectErr("[row,col {unknown-source}]: [1,42]")
+ }(ExitCode.UnparseError)
+ }
+
}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
index 18640b4fa..1da9b0bc8 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/infoset/XMLTextInfosetInputter.scala
@@ -26,16 +26,16 @@ import javax.xml.stream.XMLStreamConstants.*
import javax.xml.stream.XMLStreamException
import javax.xml.stream.XMLStreamReader
import javax.xml.stream.XMLStreamWriter
-import javax.xml.stream.util.XMLEventAllocator
import org.apache.daffodil.api
import org.apache.daffodil.api.infoset.Infoset.InfosetInputterEventType
import org.apache.daffodil.api.infoset.Infoset.InfosetInputterEventType.*
import org.apache.daffodil.lib.exceptions.Assert
-import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.lib.xml.XMLUtils
import org.apache.daffodil.runtime1.dpath.NodeInfo
+import com.ctc.wstx.cfg.ErrorConsts;
+
object XMLTextInfoset {
lazy val xmlInputFactory = {
val fact = new com.ctc.wstx.stax.WstxInputFactory()
@@ -189,25 +189,14 @@ object XMLTextInfoset {
class XMLTextInfosetInputter(input: java.io.InputStream) extends
api.infoset.InfosetInputter {
- /**
- * evAlloc is only to be used for diagnostic messages. It lets us easily
- * capture and toString the event information. But we don't call it otherwise
- * as it allocates an object, and we're trying to avoid that.
- */
- private lazy val (xsr: XMLStreamReader, evAlloc: XMLEventAllocator) = {
+ private lazy val xsr: XMLStreamReader = {
val xsr = XMLTextInfoset.xmlInputFactory.createXMLStreamReader(input)
- //
- // This gets the event allocator corresponding to the xmlStreamReader just
created.
- // Strange API. They should let you get this from the xmlStreamReader
itself.
- //
- val evAlloc = XMLTextInfoset.xmlInputFactory.getEventAllocator
-
// no need for UnparseError here. If the XML syntax is bad, parser catches
it before we get here.
Assert.invariant(xsr.hasNext())
val evNum = xsr.getEventType()
Assert.invariant(evNum == START_DOCUMENT)
- (xsr, evAlloc)
+ xsr
}
/**
@@ -307,10 +296,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream)
extends api.infoset.Inf
gatherXmlAsString()
} catch {
case xse: XMLStreamException => {
- val lineNum = evAlloc.allocate(xsr).getLocation.getLineNumber
- throw new InvalidInfosetException(
- "Error on line " + lineNum + ": " + xse.getMessage
- )
+ throw new InvalidInfosetException(xse.getMessage())
}
}
} else {
@@ -319,9 +305,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream)
extends api.infoset.Inf
xsr.getElementText()
} catch {
case xse: XMLStreamException => {
- throw new NonTextFoundInSimpleContentException(
- "Error on line " +
evAlloc.allocate(xsr).getLocation.getLineNumber
- )
+ throw new NonTextFoundInSimpleContentException(xse.getMessage)
}
}
if (primType == NodeInfo.String) {
@@ -354,10 +338,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream)
extends api.infoset.Inf
false
} else {
throw new InvalidInfosetException(
- "xsi:nil property is not a valid boolean: '" + nilAttrValue + "' on
line " + evAlloc
- .allocate(xsr)
- .getLocation
- .getLineNumber
+ s"xsi:nil property is not a valid boolean: '$nilAttrValue' at
${xsr.getLocation}"
)
}
res
@@ -403,10 +384,7 @@ class XMLTextInfosetInputter(input: java.io.InputStream)
extends api.infoset.Inf
xsr.next()
} catch {
case xse: XMLStreamException => {
- val details = "Error: " + Misc
- .getSomeMessage(xse)
- .get + " on line " +
evAlloc.allocate(xsr).getLocation.getLineNumber
- throw new IllegalContentWhereEventExpected(details)
+ throw new IllegalContentWhereEventExpected(xse.getMessage)
}
}
result match {
@@ -416,14 +394,11 @@ class XMLTextInfosetInputter(input: java.io.InputStream)
extends api.infoset.Inf
case SPACE | PROCESSING_INSTRUCTION | COMMENT => // skip these too
case DTD =>
throw new IllegalContentWhereEventExpected(
- "DOCTYPE/DTD Not supported. Error on line " + evAlloc
- .allocate(xsr)
- .getLocation
- .getLineNumber
+ s"DOCTYPE/DTD Not supported. Error at ${xsr.getLocation}"
)
case other =>
throw new IllegalContentWhereEventExpected(
- "Error on line " + evAlloc.allocate(xsr).getLocation.getLineNumber
+ " : " + other
+ s"Unexpected token: ${ErrorConsts.tokenTypeDesc(other)}. Error at
${xsr.getLocation}"
)
}
}