This is an automated email from the ASF dual-hosted git repository.

olabusayo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new fd4be9255 Add use of DAFFODIL_TDML_API_INFOSETS environment variable
fd4be9255 is described below

commit fd4be9255408438dddb197221edb8e948ed14304
Author: olabusayoT <[email protected]>
AuthorDate: Tue Nov 5 21:20:56 2024 -0500

    Add use of DAFFODIL_TDML_API_INFOSETS environment variable
    
    - currently during testing we parse/unparse using both the sax and non-sax 
API, which leads to issues like trace running outputting twice for the same 
test which is confusing. We also run the parse for all our infoset outputters. 
With this environment variable, we default to the more efficient single infoset 
outputter (scalaxml) and single API (non-sax) parse/unparse.
    - the DAFFODIL_TDML_API_INFOSETS env has 2 options: 'scala' and 'all'. with 
scala being the default and 'all' being the current implementation of running 
both APIs and all infoset outputters
    - we convert TDMLInfosetOutputter to a trait so the All and Scala 
subclasses can extend it as well as TeeInfosetOutputter
    - get rid of unused and inaccessible parse function
    - set CI mode to all for regression testing
    - add integration test showing use of scala and all mode
    
    DAFFODIL-2904
---
 .github/workflows/main.yml                         |   1 +
 .../processor/tdml/DaffodilTDMLDFDLProcessor.scala | 200 +++++++++++----------
 .../processor/tdml/TDMLInfosetOutputter.scala      |  48 ++++-
 .../org/apache/daffodil/cliTest/TestCLITdml.scala  |  58 ++++++
 4 files changed, 210 insertions(+), 97 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 332203c3f..55ec18377 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -95,6 +95,7 @@ jobs:
                      github.repository == 'apache/daffodil' &&
                      github.ref == 'refs/heads/main'
                   }}
+      DAFFODIL_TDML_API_INFOSETS: all
 
     steps:
 
diff --git 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
index 6c5a40f85..4ae2a2e84 100644
--- 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
+++ 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
@@ -176,6 +176,8 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor)
   private def blobPrefix = ""
   private def blobSuffix = ".bin"
 
+  private lazy val tdmlApiInfosetsEnv = 
sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "scala")
+
   override def withDebugging(b: Boolean): DaffodilTDMLDFDLProcessor =
     copy(dp = dp.withDebugging(b))
 
@@ -206,22 +208,16 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor)
   ): DaffodilTDMLDFDLProcessor =
     copy(dp = dp.withExternalVariables(externalVarBindings))
 
-  def parse(uri: java.net.URI, lengthLimitInBits: Long): TDMLParseResult = {
-    val url = uri.toURL
-    val dpInputStream = url.openStream()
-    val saxInputStream = url.openStream()
-    doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)
-  }
-
-  def parse(arr: Array[Byte], lengthLimitInBits: Long): TDMLParseResult = {
-    val dpInputStream = new ByteArrayInputStream(arr)
-    val saxInputStream = new ByteArrayInputStream(arr)
-    doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)
-  }
-
   override def parse(is: java.io.InputStream, lengthLimitInBits: Long): 
TDMLParseResult = {
-    val arr = IOUtils.toByteArray(is)
-    parse(arr, lengthLimitInBits)
+    val (dpInputStream, optSaxInputStream) = if (tdmlApiInfosetsEnv == "all") {
+      val arr = IOUtils.toByteArray(is)
+      val saxInputStream = new ByteArrayInputStream(arr)
+      val dpInputStream = new ByteArrayInputStream(arr)
+      (dpInputStream, Some(saxInputStream))
+    } else {
+      (is, None)
+    }
+    doParse(dpInputStream, optSaxInputStream, lengthLimitInBits)
   }
 
   override def unparse(
@@ -252,104 +248,126 @@ class DaffodilTDMLDFDLProcessor private (private var 
dp: DataProcessor)
     infosetXML: scala.xml.Node,
     outStream: java.io.OutputStream
   ): TDMLUnparseResult = {
-    val bos = new ByteArrayOutputStream()
-    val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
-    scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
-    osw.flush()
-    osw.close()
-    val saxInstream = new ByteArrayInputStream(bos.toByteArray)
-    doUnparseWithBothApis(inputter, saxInstream, outStream)
+    val optSaxInstream = if (tdmlApiInfosetsEnv == "all") {
+      val bos = new ByteArrayOutputStream()
+      val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
+      scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
+      osw.flush()
+      osw.close()
+      val sis = new ByteArrayInputStream(bos.toByteArray)
+      Some(sis)
+    } else {
+      None
+    }
+    doUnparse(inputter, optSaxInstream, outStream)
   }
 
-  def doParseWithBothApis(
+  def doParse(
     dpInputStream: java.io.InputStream,
-    saxInputStream: java.io.InputStream,
+    optSaxInputStream: Option[java.io.InputStream] = None,
     lengthLimitInBits: Long
   ): TDMLParseResult = {
-    val outputter = new TDMLInfosetOutputter()
+    val outputter = if (tdmlApiInfosetsEnv == "all") {
+      new TDMLInfosetOutputterAll
+    } else {
+      new TDMLInfosetOutputterScala
+    }
     outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix)
 
-    val xri = dp.newXMLReaderInstance
-    val errorHandler = new DaffodilTDMLSAXErrorHandler()
-    val saxOutputStream = new ByteArrayOutputStream()
-    val saxHandler =
-      new DaffodilParseOutputStreamContentHandler(saxOutputStream, pretty = 
false)
-    xri.setContentHandler(saxHandler)
-    xri.setErrorHandler(errorHandler)
-    xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY, blobDir)
-    xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX, blobPrefix)
-    xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBSUFFIX, blobSuffix)
-
     using(InputSourceDataInputStream(dpInputStream)) { dis =>
-      using(InputSourceDataInputStream(saxInputStream)) { sis =>
-        // The length limit here should be the length of the document
-        // under test. Only set a limit when the end of the document
-        // do not match a byte boundary.
-        if (lengthLimitInBits % 8 != 0) {
-          Assert.usage(lengthLimitInBits >= 0)
-          dis.setBitLimit0b(MaybeULong(lengthLimitInBits))
-          sis.setBitLimit0b(MaybeULong(lengthLimitInBits))
-        }
-
-        val actual = dp.parse(dis, outputter)
-        xri.parse(sis)
+      // The length limit here should be the length of the document
+      // under test. Only set a limit when the end of the document
+      // do not match a byte boundary.
+      if (lengthLimitInBits % 8 != 0) {
+        Assert.usage(lengthLimitInBits >= 0)
+        dis.setBitLimit0b(MaybeULong(lengthLimitInBits))
+      }
 
-        if (!actual.isError && !errorHandler.isError) {
-          verifySameParseOutput(outputter.xmlStream, saxOutputStream)
+      val actual = dp.parse(dis, outputter)
+      if (tdmlApiInfosetsEnv == "all") {
+        val saxInputStream = optSaxInputStream.get
+        using(InputSourceDataInputStream(saxInputStream)) { sis =>
+          // The length limit here should be the length of the document
+          // under test. Only set a limit when the end of the document
+          // do not match a byte boundary.
+          if (lengthLimitInBits % 8 != 0) {
+            Assert.usage(lengthLimitInBits >= 0)
+            sis.setBitLimit0b(MaybeULong(lengthLimitInBits))
+          }
+
+          val xri = dp.newXMLReaderInstance
+          val errorHandler = new DaffodilTDMLSAXErrorHandler()
+          val saxOutputStream = new ByteArrayOutputStream()
+          val saxHandler =
+            new DaffodilParseOutputStreamContentHandler(saxOutputStream, 
pretty = false)
+          xri.setContentHandler(saxHandler)
+          xri.setErrorHandler(errorHandler)
+          xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY, blobDir)
+          xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX, blobPrefix)
+          xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBSUFFIX, blobSuffix)
+
+          xri.parse(sis)
+
+          if (!actual.isError && !errorHandler.isError) {
+            verifySameParseOutput(outputter.xmlStream, saxOutputStream)
+          }
+          val dpParseDiag = actual.getDiagnostics.map(_.getMessage())
+          val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage())
+          verifySameDiagnostics(dpParseDiag, saxParseDiag)
         }
-        val dpParseDiag = actual.getDiagnostics.map(_.getMessage())
-        val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage())
-        verifySameDiagnostics(dpParseDiag, saxParseDiag)
-
-        new DaffodilTDMLParseResult(actual, outputter)
       }
+      new DaffodilTDMLParseResult(actual, outputter)
     }
   }
 
-  def doUnparseWithBothApis(
+  def doUnparse(
     dpInputter: TDMLInfosetInputter,
-    saxInputStream: java.io.InputStream,
+    optSaxInputStream: Option[java.io.InputStream] = None,
     dpOutputStream: java.io.OutputStream
   ): DaffodilTDMLUnparseResult = {
 
     val dpOutputChannel = java.nio.channels.Channels.newChannel(dpOutputStream)
-    val saxOutputStream = new ByteArrayOutputStream
-    val saxOutputChannel = 
java.nio.channels.Channels.newChannel(saxOutputStream)
-    val unparseContentHandler = dp.newContentHandlerInstance(saxOutputChannel)
-    unparseContentHandler.enableResolutionOfRelativeInfosetBlobURIs()
-    val xmlReader = DaffodilSAXParserFactory().newSAXParser.getXMLReader
-    xmlReader.setContentHandler(unparseContentHandler)
-    xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
-    xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
-
     val actualDP = dp.unparse(dpInputter, 
dpOutputChannel).asInstanceOf[UnparseResult]
     dpOutputChannel.close()
-    // kick off SAX Unparsing
-    try {
-      xmlReader.parse(new InputSource(saxInputStream))
-    } catch {
-      case e: DaffodilUnhandledSAXException =>
-        // In the case of an unexpected errors, catch and throw as 
TDMLException
-        throw TDMLException("Unexpected error during SAX Unparse:" + e, None)
-      case _: DaffodilUnparseErrorSAXException =>
-      // do nothing as unparseResult and its diagnostics will be handled below
-    }
 
-    val actualSAX = unparseContentHandler.getUnparseResult
-    saxOutputChannel.close()
-    if (!actualDP.isError && !actualSAX.isError) {
-      val dpis = new ByteArrayInputStream(
-        dpOutputStream.asInstanceOf[ByteArrayOutputStream].toByteArray
-      )
-      if (actualDP.isScannable && actualSAX.isScannable) {
-        VerifyTestCase.verifyTextData(dpis, saxOutputStream, 
actualSAX.encodingName, None)
-      } else {
-        VerifyTestCase.verifyBinaryOrMixedData(dpis, saxOutputStream, None)
+    if (tdmlApiInfosetsEnv == "all") {
+      val saxInputStream = optSaxInputStream.get
+      val saxOutputStream = new ByteArrayOutputStream
+      val saxOutputChannel = 
java.nio.channels.Channels.newChannel(saxOutputStream)
+      val unparseContentHandler = 
dp.newContentHandlerInstance(saxOutputChannel)
+      unparseContentHandler.enableResolutionOfRelativeInfosetBlobURIs()
+      val xmlReader = DaffodilSAXParserFactory().newSAXParser.getXMLReader
+      xmlReader.setContentHandler(unparseContentHandler)
+      xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+      xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+
+      // kick off SAX Unparsing
+      try {
+        xmlReader.parse(new InputSource(saxInputStream))
+      } catch {
+        case e: DaffodilUnhandledSAXException =>
+          // In the case of an unexpected errors, catch and throw as 
TDMLException
+          throw TDMLException("Unexpected error during SAX Unparse:" + e, None)
+        case _: DaffodilUnparseErrorSAXException =>
+        // do nothing as unparseResult and its diagnostics will be handled 
below
+      }
+
+      val actualSAX = unparseContentHandler.getUnparseResult
+      saxOutputChannel.close()
+      if (!actualDP.isError && !actualSAX.isError) {
+        val dpis = new ByteArrayInputStream(
+          dpOutputStream.asInstanceOf[ByteArrayOutputStream].toByteArray
+        )
+        if (actualDP.isScannable && actualSAX.isScannable) {
+          VerifyTestCase.verifyTextData(dpis, saxOutputStream, 
actualSAX.encodingName, None)
+        } else {
+          VerifyTestCase.verifyBinaryOrMixedData(dpis, saxOutputStream, None)
+        }
       }
+      val dpUnparseDiag = actualDP.getDiagnostics.map(_.getMessage())
+      val saxUnparseDiag = actualSAX.getDiagnostics.map(_.getMessage())
+      verifySameDiagnostics(dpUnparseDiag, saxUnparseDiag)
     }
-    val dpUnparseDiag = actualDP.getDiagnostics.map(_.getMessage())
-    val saxUnparseDiag = actualSAX.getDiagnostics.map(_.getMessage())
-    verifySameDiagnostics(dpUnparseDiag, saxUnparseDiag)
 
     new DaffodilTDMLUnparseResult(actualDP, dpOutputStream)
   }
@@ -408,11 +426,11 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor)
 final class DaffodilTDMLParseResult(actual: DFDL.ParseResult, outputter: 
TDMLInfosetOutputter)
   extends TDMLParseResult {
 
-  override def getResult: Node = outputter.getResult()
+  override def getResult: Node = outputter.getResult
 
   override def getBlobPaths: Seq[Path] = outputter.getBlobPaths()
 
-  def inputter = outputter.toInfosetInputter()
+  def inputter = outputter.toInfosetInputter
 
   override def isProcessingError: Boolean = actual.isProcessingError
 
diff --git 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
index ea569918b..40a8e495b 100644
--- 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
+++ 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
@@ -19,7 +19,10 @@ package org.apache.daffodil.processor.tdml
 
 import java.io.ByteArrayInputStream
 import java.io.ByteArrayOutputStream
+import java.nio.charset.Charset
+import scala.xml.Node
 
+import org.apache.daffodil.runtime1.infoset.InfosetOutputter
 import org.apache.daffodil.runtime1.infoset.JDOMInfosetInputter
 import org.apache.daffodil.runtime1.infoset.JDOMInfosetOutputter
 import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter
@@ -33,10 +36,32 @@ import 
org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter
 import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter
 import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter
 
-class TDMLInfosetOutputter
+class TDMLInfosetOutputterScala
+  extends {
+    private val scalaOut = new ScalaXMLInfosetOutputter()
+    private val outputters: Seq[InfosetOutputter] = Seq(scalaOut)
+  }
+  with TeeInfosetOutputter(outputters: _*)
+  with TDMLInfosetOutputter {
+
+  override def getResult: Node = scalaOut.getResult
+
+  override lazy val xmlStream: ByteArrayOutputStream = {
+    val bos = new ByteArrayOutputStream()
+    bos.write(getResult.toString().getBytes(Charset.defaultCharset()))
+    bos
+  }
+
+  override def toInfosetInputter: TDMLInfosetInputter = {
+    val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
+    new TDMLInfosetInputter(scalaIn, Seq())
+  }
+}
+
+class TDMLInfosetOutputterAll
   extends {
     private val jsonStream = new ByteArrayOutputStream()
-    val xmlStream = new ByteArrayOutputStream()
+    override val xmlStream = new ByteArrayOutputStream()
 
     private val scalaOut = new ScalaXMLInfosetOutputter()
     private val jdomOut = new JDOMInfosetOutputter()
@@ -44,13 +69,15 @@ class TDMLInfosetOutputter
     private val jsonOut = new JsonInfosetOutputter(jsonStream, false)
     private val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)
 
-    private val outputters = Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)
+    private val outputters: Seq[InfosetOutputter] =
+      Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)
   }
-  with TeeInfosetOutputter(outputters: _*) {
+  with TeeInfosetOutputter(outputters: _*)
+  with TDMLInfosetOutputter {
 
-  def getResult() = scalaOut.getResult
+  override def getResult: Node = scalaOut.getResult
 
-  def toInfosetInputter() = {
+  override def toInfosetInputter: TDMLInfosetInputter = {
     val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
     val jdomIn = new JDOMInfosetInputter(jdomOut.getResult)
     val w3cdomIn = new W3CDOMInfosetInputter(w3cdomOut.getResult)
@@ -63,3 +90,12 @@ class TDMLInfosetOutputter
     new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn, 
nullIn))
   }
 }
+
+trait TDMLInfosetOutputter extends InfosetOutputter {
+
+  def xmlStream: ByteArrayOutputStream
+
+  def getResult: Node
+
+  def toInfosetInputter: TDMLInfosetInputter
+}
diff --git 
a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
 
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
new file mode 100644
index 000000000..7435696f2
--- /dev/null
+++ 
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.cliTest
+
+import org.apache.daffodil.cli.Main.ExitCode
+import org.apache.daffodil.cli.cliTest.Util._
+
+import org.junit.Test
+
+class TestCLITdml {
+
+  @Test def test_CLI_Tdml_Trace_singleTest1(): Unit = {
+    val tdml = path(
+      
"daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml"
+    )
+
+    val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "all")
+
+    runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli =>
+      // legacy parse
+      cli.expect("parser: <Element name='e3'>")
+      // sax parse
+      cli.expect("parser: <Element name='e3'>")
+      cli.expect("[Pass] byte_entities_6_08")
+    }(ExitCode.Success)
+  }
+
+  @Test def test_CLI_Tdml_Trace_singleTest2(): Unit = {
+    val tdml = path(
+      
"daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml"
+    )
+
+    val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "scala")
+
+    runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli =>
+      // parse
+      cli.expect("parser: <Element name='e3'>")
+      // unparse
+      cli.expect("parser: not available")
+      cli.expect("[Pass] byte_entities_6_08")
+    }(ExitCode.Success)
+  }
+}

Reply via email to