This is an automated email from the ASF dual-hosted git repository.

mbeckerle pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new f2a9bd652 Improve diagnostic from delimiter scanning
f2a9bd652 is described below

commit f2a9bd652b51d98c34d5055c16dfe79ae22239b7
Author: Michael Beckerle <[email protected]>
AuthorDate: Tue Mar 26 18:22:34 2024 -0400

    Improve diagnostic from delimiter scanning
    
    DAFFODIL-2345
---
 .../daffodil/cli/cliTest/TestCLIParsing.scala      |   5 +-
 .../main/scala/org/apache/daffodil/io/Dump.scala   |   6 +-
 .../org/apache/daffodil/io/TestDumpDisplay.scala   |  21 ++-
 .../scala/org/apache/daffodil/lib/util/Misc.scala  | 205 ++++++++++-----------
 .../org/apache/daffodil/lib/xml/XMLUtils.scala     |   4 +-
 .../daffodil/runtime1/processors/dfa/Runtime.scala |  15 ++
 .../processors/parsers/DelimiterParsers.scala      |  42 ++++-
 .../processors/parsers/SeparatedParseHelper.scala  |   2 +-
 .../runtime1/parser/TestCharsetBehavior.scala      |   2 +-
 .../org/apache/daffodil/tdml/TDMLRunner.scala      |   4 +-
 .../delimiter_properties/DelimiterProperties.tdml  |  26 ++-
 .../sequence_groups/SequenceGroupDelimiters.tdml   |   2 +-
 12 files changed, 192 insertions(+), 142 deletions(-)

diff --git 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
index 1de7a5397..b68e32bf0 100644
--- 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
+++ 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
@@ -151,14 +151,15 @@ class TestCLIParsing {
     }(ExitCode.BadExternalVariable)
   }
 
-  @Test def test_CLI_Parsing_SimpleParse_DFDL1197_fix(): Unit = {
+  @Test
+  def test_CLI_Parsing_SimpleParse_DFDL1197_fix(): Unit = {
     val schema = path(
       
"daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/testOptionalInfix.dfdl.xsd",
     )
 
     runCLI(args"-vv parse -s $schema") { cli =>
       cli.sendLine("1/3", inputDone = true)
-      cli.expectErr("<Sequence><Separator/><RepMinMax name='s1'>")
+      cli.expectErr("<Sequence><separator/><RepMinMax name='s1'>")
     }(ExitCode.LeftOverData)
   }
 
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala 
b/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
index 049875727..21a84f839 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
@@ -640,7 +640,7 @@ class DataDumper {
                 homogenizeChars(uCodePoint)
               }
             } else {
-              homogenizeChars(Misc.remapCodepointToVisibleGlyph(allChars(0)))
+              
homogenizeChars(Misc.remapControlOrLineEndingToVisibleGlyphs(allChars(0)))
             }
           remapped = r
           nCols = n
@@ -661,8 +661,8 @@ class DataDumper {
         //
         // FIXME: This will be really broken for EBCDIC-based encodings. Pass 
the encoding
         // so that the glyph routine can be ascii/ebcdic sensitive.
-        val remapped = Misc.remapByteToVisibleGlyph(byteValue)
-        (remapped.toChar.toString, 1, 1)
+        val remapped = Misc.remapOneByteToVisibleGlyph(byteValue)
+        (remapped.toString, 1, 1)
       }
     }
   }
diff --git 
a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala 
b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala
index e1c57aec1..3d98580ee 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala
@@ -17,9 +17,6 @@
 
 package org.apache.daffodil.io
 
-import java.nio.ByteBuffer
-import java.nio.CharBuffer
-
 import org.apache.daffodil.lib.util.Misc
 import org.apache.daffodil.lib.xml.XMLUtils
 
@@ -41,12 +38,8 @@ class TestDumpDisplay {
    */
   @Test def testAllPrintableChars() = {
 
-    // val bytes = 0 to 255 map { _.toByte }
-    val bb = ByteBuffer.allocate(256)
-    (0 to 255).foreach { n => bb.put(n, n.toByte) }
-    val cb = CharBuffer.allocate(256)
-    Misc.remapBytesToVisibleGlyphs(bb, cb)
-    val res = cb.toString
+    val bytes: Array[Byte] = (0 to 255).map { _.toByte }.toArray
+    val res = Misc.remapBytesToStringOfVisibleGlyphs(bytes)
     val exp =
       //
       // C0 Controls - use unicode control picture characters.
@@ -72,7 +65,15 @@ class TestDumpDisplay {
         "¡¢£¤¥¦§¨©ª«¬" +
         "-" + // 0xAD soft hyphen mapped to regular hyphen (because soft 
hyphen seems to be a zero-width in many fonts.
         
"®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
-    assertEquals(exp, res)
+    assertEquals(exp.length, res.length)
+    val sb = new StringBuilder()
+    ((exp.zip(res)).zip(0 to res.length)).foreach { case ((exp, res), i) =>
+      if (exp != res) {
+        sb.append(s"At index $i expected '$exp' but actual was '$res'\n")
+      }
+    }
+    val msg = sb.toString()
+    if (msg.nonEmpty) fail(msg)
   }
 
 }
diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala
index ff828f49a..2e78cefbb 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala
@@ -24,9 +24,9 @@ import java.io.IOException
 import java.net.URI
 import java.net.URLClassLoader
 import java.nio.ByteBuffer
-import java.nio.CharBuffer
 import java.nio.channels.ReadableByteChannel
 import java.nio.channels.WritableByteChannel
+import java.nio.charset.Charset
 import java.nio.charset.CodingErrorAction
 import java.nio.charset.StandardCharsets
 import java.nio.charset.{ Charset => JavaCharset }
@@ -38,6 +38,8 @@ import scala.io.Source
 import org.apache.daffodil.lib.equality._
 import org.apache.daffodil.lib.exceptions.Assert
 
+import passera.unsigned.UByte
+
 /**
  * Various reusable utilities that I couldn't easily find a better place for.
  */
@@ -424,7 +426,7 @@ object Misc {
 
   /**
    * This function creates a representation of data which doesn't
-   * contain any whitespace characters that jump around the screen.
+   * contain any control or whitespace characters that jump around the screen.
    * It replaces those with characters that have a simple glyph.
    *
    * The point of this is when you display the stream of data for
@@ -434,13 +436,40 @@ object Misc {
    * Replacing these with the picture characters (designed for this purpose)
    * in the unicode x2400 block helps.
    */
-  def remapControlsAndLineEndingsToVisibleGlyphs(s: String) = {
-    s.map { remapControlOrLineEndingToVisibleGlyphs(_) }.mkString
+  def remapStringToVisibleGlyphs(s: String) =
+    nonGlyphToVisibleGlyphsRemapper.remap(s)
+
+  object nonGlyphToVisibleGlyphsRemapper extends CharacterSetRemapper {
+    override protected def remap(prev: Char, curr: Char, next: Char): Int =
+      remapControlOrLineEndingToVisibleGlyphs(curr)
   }
 
-  def remapControlOrLineEndingToVisibleGlyphs(c: Char) = {
+  /**
+   * For debugger displays, data dumps, etc.
+   *
+   * Control characters, line-endings, spaces, and various others do not have 
a glyph that is displayed.
+   * They also can cause text to be split across lines, bells to ring, 
characters to be overwritten, etc.
+   *
+   * Convert to a character that has a glyph. Unicode provides some 
control-picture
+   * characters for this purpose. Note that this by-definition loses 
information, and many characters
+   * will map to the unicode replacement character. It is expected this 
display would be complemented
+   * by a hex dump or other means of understanding the actual representation 
of these remapped
+   * characters.
+   *
+   * Keep in mind this is a Unicode to Unicode transformation. It is not 
starting from byte values.
+   * See `byteToChar(b)` for how to got from raw byte values to unicode chars.
+   * @param c a unicode character that may or may not have a glyph.
+   * @param replaceControlPictures when true means the Unicode control 
pictures characters are replaced by
+   *                               the Unicode replacement character. When 
false these characters are preserved.
+   *                               Defaults to false.
+   * @return a unicode character that definitely has a glyph.
+   */
+  def remapControlOrLineEndingToVisibleGlyphs(
+    c: Char,
+    replaceControlPictures: Boolean = false,
+  ): Char = {
     val URC =
-      0x2426 // Unicode control picture character for substutition (also looks 
like arabic q-mark)
+      0x2426 // Unicode control picture character for substitution (also looks 
like arabic q-mark)
     val code = c.toInt match {
       //
       // C0 Control pictures
@@ -448,8 +477,15 @@ object Misc {
       case 0x20 => 0x2423 // For space we use the SP we use the ␣ (Unicode 
OPEN BOX)
       case 0x7f => 0x2421 // DEL pic isn't at 0x247F, it's at 0x2421
       //
+      // We remap these into the Unicode Latin Extended B codepoints by
+      // adding 0x100 to their basic value.
+      //
+      case n if (n >= 0x80 && n <= 0x9f) =>
+        n + 0x100
+      case 0xa0 => 0x2422 // non-break space => ␢ (blank symbol or little b 
with stroke)
+      case 0xad => 0x002d // soft hyphen => hyphen
+      //
       // Unicode separators & joiners
-      case 0x00a0 => URC // no-break space
       case 0x200b => URC // zero width space
       case 0x2028 => URC // line separator
       case 0x2029 => URC // paragraph separator
@@ -477,123 +513,84 @@ object Misc {
       //
       //
       // Special case - if incoming character is one of the glyph
-      // characters we're remapping onto, then change to URC
+      // characters we're remapping onto, then we could issue
+      // a substitution character, but there are things that depend
+      // on these being preserved. So we have a flag to control this.
       //
-      case n if (n > 0x2400 && n < 0x2423) => URC
+      case n if (n > 0x2400 && n < 0x2423 && replaceControlPictures) => URC
       case _ => c
     }
     code.toChar
   }
 
-  private val bytesCharset =
-    JavaCharset.forName("windows-1252") // same as iso-8859-1 but has a few 
more glyphs.
-  private val bytesDecoder = {
-    val decoder = bytesCharset.newDecoder()
-    decoder.onMalformedInput(CodingErrorAction.REPLACE)
-    decoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
-    decoder
+  private lazy val byteToCharTable = {
+    val cs = Charset.forName("windows-1252")
+    val dec = cs
+      .newDecoder()
+      .onUnmappableCharacter(CodingErrorAction.REPLACE)
+      .onMalformedInput(CodingErrorAction.REPORT)
+    val bb = ByteBuffer.wrap((0 to 255).map { i => i.toByte }.toArray)
+    val cb = dec.decode(bb)
+    assert(cb.position == 0)
+    assert(cb.limit == 256)
+    // These 5 are unmapped by Windows-1252 but we want to turn any
+    // byte into a legit character. So these We add 0x100
+    // to get unicode codepoints.
+    cb.put(0x81, 0x181.toChar)
+    cb.put(0x8d, 0x18d.toChar)
+    cb.put(0x8f, 0x18f.toChar)
+    cb.put(0x90, 0x190.toChar)
+    cb.put(0x9d, 0x19d.toChar)
+    val res = cb.toString
+    res
   }
 
   /**
-   * Used when creating a debugging dump of data, where the data might be 
binary stuff
-   * but we want to show some sort of glyph for each byte.
+   * Convert a byte to a unicode character assuming the byte is iso-8859-1
+   * (or really, windows-1252 which has a few more glyph chars but is otherwise
+   * the same as iso-8859-1)
    *
-   * This uses windows-1252 for all the places it has glyphs, and other unicode
-   * glyph characters to replace the control characters and unused characters.
+   * This is a super pain to do using Java charsets because they
+   * don't provide an API to convert one character, only byte buffers
+   * into char buffers.
    *
-   * This allows printing a data dump to the screen, without worry that the 
control
-   * characters will ring bells or cause the text to jump around, and unmapped
-   * characters will not look like spaces, nor all look like the same unicde 
replacement
-   * character.
+   * So we just use a lookup table.
+   * @param b a byte containing a code point of windows-1252 encoding
+   * @return a unicode equivalent character
    */
-  def remapBytesToVisibleGlyphs(bb: ByteBuffer, cb: CharBuffer): Unit = {
-    val numBytes = bb.remaining()
-    bytesDecoder.decode(bb, cb, true)
-    cb.flip
-    var i = 0
-    while (i < numBytes) {
-      val newCodepoint = remapOneByteToVisibleGlyph(bb.get(i))
-      if (newCodepoint != -1) {
-        cb.put(i, newCodepoint.toChar)
-      }
-      i += 1
-    }
-  }
-
-  /**
-   * For unicode codepoints in the range 0 to 255, or signed -128 to 127,
-   * make sure there is a visible glyph.
-   */
-  def remapCodepointToVisibleGlyph(codepoint: Int): Int = {
-    if (codepoint > 255 || codepoint < -128) return codepoint
-    val b = codepoint.toByte
-    val r = remapOneByteToVisibleGlyph(b)
-    if (r == -1) codepoint else r
-  }
-
-  def remapStringToVisibleGlyphs(s: String) = {
-    s.map { c => remapCodepointToVisibleGlyph(c.toInt).toChar }
-  }
-
-  def remapBytesToStringOfVisibleGlyphs(ba: Array[Byte]): String = {
-    ba.map { b => remapCodepointToVisibleGlyph(b.toInt).toChar }.mkString
-  }
-
-  def remapByteToVisibleGlyph(b: Byte): Int = {
-    val bb = ByteBuffer.allocate(1)
-    bb.put(0, b)
-    val cb = CharBuffer.allocate(1)
-    remapBytesToVisibleGlyphs(bb, cb)
-    cb.get(0).toChar.toInt
+  def byteToChar(b: Byte): Char = {
+    byteToCharTable(UByte(b).toInt)
   }
 
   /**
-   * Remaps a byte to a unicode codepoint for a visible picture, or -1 if
-   * no remapping is needed.
-   *
-   * A difficulty is that there do not seem to be generally available Unicode 
fonts
-   * which are truly monospaced for every Unicode character. So since we are
-   * trying to produce data dumps that are monospaced, the tabular layout is 
off a bit.
-   *
-   * Even if there was such a font, it wouldn't be the default font.
+   * This function creates a representation of data which doesn't
+   * contain any control or whitespace characters that jump around the screen.
+   * It replaces those with characters that have a simple glyph.
    *
-   * Courier New seems to work well. It is monospaced for every character we 
use
-   * in this remap stuff. But not for the "double wide" Kanji or other wide 
oriental
-   * characters.
+   * The point of this is when you display the stream of data for
+   * debugging, or for a diagnostic message, and it is mostly single-byte text
+   * characters, then the characters which control position like CR, LF, FF,
+   * VT, HT, BS, etc. all make it hard to figure out what is going on.
+   * Replacing these with the picture characters (designed for this purpose)
+   * in the unicode x2400 block helps.
    */
-  private def remapOneByteToVisibleGlyph(b: Byte): Int = {
-    Bits.asUnsignedByte(b) match {
-      //
-      // replace C0 controls with unicode control pictures
-      //
-      case n if (n <= 0x1f) => n + 0x2400
-      //
-      // replace space and DEL with control pictures
-      //
-      case 0x20 => 0x2423 // For space we use the SP we use the ␣ (Unicode 
OPEN BOX)
-      case 0x7f => 0x2421 // DEL pic isn't at 0x247F, it's at 0x2421
-      //
-      // replace undefined characters in the C1 control space with
-      // glyph characters. These are the only codepoints in the C1
-      // space which do not have a glyph defined by windows-1252
-      //
-      // We remap these into the Unicode Latin Extended B codepoints by
-      // adding 0x100 to their basic value.
-      //
-      case 0x81 => 0x0181
-      case 0x8d => 0x018d
-      case 0x8f => 0x018f
-      case 0x90 => 0x0190
-      case 0x9d => 0x019d
-      //
-      // Non-break space
-      //
-      case 0xa0 => 0x2422 // little b with stroke
-      case 0xad => 0x002d // soft hyphen becomes hyphen
-      case regular => -1 // all other cases -1 means we just use the regular 
character glyph.
+  def remapBytesToStringOfVisibleGlyphs(ba: Array[Byte]): String = {
+    val len = ba.length
+    if (len == 0) ""
+    else {
+      val sb = new StringBuilder(ba.length)
+      var i: Int = 0
+      while (i < ba.length) {
+        sb.append(remapControlOrLineEndingToVisibleGlyphs(byteToChar(ba(i))))
+        i += 1
+      }
+      sb.toString()
     }
   }
 
+  def remapOneByteToVisibleGlyph(b: Byte) =
+    remapControlOrLineEndingToVisibleGlyphs(byteToChar(b))
+
   /**
    * True if this charset encoding is suitable for display using the
    * all-visible-glyph stuff above.
diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index 280d0e118..5d6996c55 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -1136,10 +1136,10 @@ Differences were (path, expected, actual):
       // prefix of the other and index is where the prefix ends, or index is
       // the first difference found. Either way, we can safely use slice() to
       // get at most some number of characters at that index for context.
-      val contextA = Misc.remapControlsAndLineEndingsToVisibleGlyphs(
+      val contextA = Misc.remapStringToVisibleGlyphs(
         dataA.slice(index, index + CHARS_TO_SHOW_AFTER_DIFF),
       )
-      val contextB = Misc.remapControlsAndLineEndingsToVisibleGlyphs(
+      val contextB = Misc.remapStringToVisibleGlyphs(
         dataB.slice(index, index + CHARS_TO_SHOW_AFTER_DIFF),
       )
       val path = zPath + ".charAt(" + (index + 1) + ")"
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
index 263b1bb90..899bcc45f 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
@@ -19,6 +19,7 @@ package org.apache.daffodil.runtime1.processors.dfa
 
 import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.exceptions.SchemaFileLocation
+import org.apache.daffodil.lib.util.Misc
 import org.apache.daffodil.runtime1.processors.parsers.DelimiterTextType
 
 /**
@@ -133,6 +134,17 @@ trait DFAField extends DFA {
   final override def run(r: Registers): Unit = runLoop(r, DFA.EndOfData, 
StateKind.EndOfData)
 }
 
+object DFADelimiter {
+  private val controlOrWhitespace = "\\p{C}|\\p{Z}".r
+
+  private def containsCtrlOrWS(s: String) = 
controlOrWhitespace.findFirstMatchIn(s).isDefined
+
+  def strForDiagnostic(s: String) =
+    if (containsCtrlOrWS(s))
+      s"'$s' ('${Misc.remapStringToVisibleGlyphs(s)}')"
+    else s"'$s'"
+}
+
 trait DFADelimiter extends DFA {
   def delimType: DelimiterTextType.Type
   def lookingFor: String
@@ -147,6 +159,9 @@ trait DFADelimiter extends DFA {
   final val isES = lookingFor == "%ES;"
 
   def unparseValue: String
+
+  lazy val strForDiagnostic: String = DFADelimiter.strForDiagnostic(lookingFor)
+
 }
 
 /**
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
index 058d457eb..d8be2ef01 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
@@ -28,10 +28,14 @@ import 
org.apache.daffodil.runtime1.processors.LocalTypedDelimiterIterator
 import 
org.apache.daffodil.runtime1.processors.RemoteTerminatingMarkupAndLocalTypedDelimiterIterator
 import org.apache.daffodil.runtime1.processors.TermRuntimeData
 import org.apache.daffodil.runtime1.processors.dfa.DFADelimiter
+import org.apache.daffodil.runtime1.processors.dfa.ParseResult
 import org.apache.daffodil.runtime1.processors.dfa.TextParser
 
 object DelimiterTextType extends Enum {
-  abstract sealed trait Type extends EnumValueType
+  abstract sealed trait Type extends EnumValueType {
+    override lazy val toString =
+      Misc.initialLowerCase(getClass().getSimpleName.replace("$", ""))
+  }
   case object Initiator extends Type
   case object Separator extends Type
   case object Terminator extends Type
@@ -73,6 +77,34 @@ class DelimiterTextParser(
     foundLocalDFAIndex >= 0
   }
 
+  private def localDelimiters(state: PState): Seq[DFADelimiter] = {
+    val localIndexStart = state.mpstate.delimitersLocalIndexStack.top
+    val inScopeDelimiters = state.mpstate.delimiters
+    val res = inScopeDelimiters.slice(localIndexStart, 
inScopeDelimiters.length)
+    res
+  }
+
+  private def didNotFindExpectedDelimiter(foundDelimiter: ParseResult, start: 
PState): Unit = {
+    val localDelims = localDelimiters(start)
+    val foundDFA = foundDelimiter.matchedDFAs(0)
+    PE(
+      start,
+      """Found enclosing delimiter: %s during scan for local delimiter(s): %s.
+         | The expected delimiter(s) were: %s.
+         | The enclosing delimiter was from %s %s.
+         |""".stripMargin,
+      foundDFA.strForDiagnostic,
+      localDelims.map { d => d.strForDiagnostic }.mkString(", "),
+      localDelims
+        .map { d =>
+          s"  ${d.delimType.toString} ${d.strForDiagnostic} from ${d.location} 
${d.location.locationDescription}."
+        }
+        .mkString("\n", "\n", ""),
+      foundDFA.location,
+      foundDFA.location.locationDescription,
+    )
+  }
+
   override def parse(start: PState): Unit = {
 
     val maybeDelimIter =
@@ -108,15 +140,9 @@ class DelimiterTextParser(
     if (foundDelimiter.isDefined) {
       if (!containsLocalMatch(foundDelimiter.get.matchedDFAs, start)) {
         // It was a remote delimiter but we should have found a local one.
-        PE(
-          start,
-          "Found out of scope delimiter: '%s' '%s'",
-          foundDelimiter.get.matchedDFAs(0).lookingFor,
-          
Misc.remapStringToVisibleGlyphs(foundDelimiter.get.matchedDelimiterValue.get),
-        )
+        didNotFindExpectedDelimiter(foundDelimiter.get, start)
         return
       }
-
       // Consume the found local delimiter but also check if it was supposed 
to match
       // a non-zero number of bits and throw a runtime SDE if necessary
       val nChars = foundDelimiter.get.matchedDelimiterValue.get.length
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
index b120d5463..78db82041 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
@@ -63,7 +63,7 @@ sealed abstract class SeparatorParseHelper(
           cause,
         )
       case _ =>
-        sep.PE(pstate, "Failed to parse %s separator. Cause: %s.", kind, cause)
+        sep.PE(pstate, "Failed to find %s separator. Cause: %s.", kind, cause)
     }
   }
 }
diff --git 
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
 
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
index d26147534..77a2912c0 100644
--- 
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
+++ 
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
@@ -476,7 +476,7 @@ class TestUnicodeErrorTolerance {
     val inreader = new InputStreamReader(input, decoder)
     val cb = new StringBuffer;
     for (i <- 0 to 255) cb.appendCodePoint(inreader.read())
-    val act = Misc.remapControlsAndLineEndingsToVisibleGlyphs(
+    val act = Misc.remapStringToVisibleGlyphs(
       XMLUtils.remapXMLIllegalCharactersToPUA(cb.toString()),
     )
     //
diff --git 
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala 
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index fd26368d4..0499c5c29 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -1981,13 +1981,13 @@ object VerifyTestCase {
     lazy val actual8859String =
       
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(actualBytes)).toString()
     lazy val displayableActual =
-      Misc.remapControlsAndLineEndingsToVisibleGlyphs(actual8859String)
+      Misc.remapStringToVisibleGlyphs(actual8859String)
 
     val expectedBytes = IOUtils.toByteArray(expectedData)
     lazy val expected8859String =
       
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(expectedBytes)).toString()
     lazy val displayableExpected =
-      Misc.remapControlsAndLineEndingsToVisibleGlyphs(expected8859String)
+      Misc.remapStringToVisibleGlyphs(expected8859String)
 
     lazy val expectedAndActualDisplayStrings = "\n" +
       "Excected data (as iso8859-1): " + displayableExpected + "\n" +
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
index 471126389..ba2391d69 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
@@ -742,9 +742,12 @@
     model="ReqFieldMissingAndSepIsPrefixOfTerminator">
     <tdml:document><![CDATA[$A$$]]></tdml:document>
     <tdml:errors>
-    <tdml:error>Parse Error</tdml:error>
-    <tdml:error>Found out of scope delimiter</tdml:error>
-    <tdml:error>$$</tdml:error>
+      <tdml:error>Parse Error</tdml:error>
+      <tdml:error>prefix separator</tdml:error>
+      <tdml:error>Found enclosing delimiter: '$$'</tdml:error>
+      <tdml:error>during scan for local delimiter(s): '$'</tdml:error>
+      <tdml:error>Separator '$' from</tdml:error>
+      <tdml:error>ex:E1</tdml:error>
     </tdml:errors>
   </tdml:parserTestCase>
   
@@ -752,9 +755,12 @@
     model="ReqFieldMissingAndSepIsPrefixOfTerminator">
     <tdml:document><![CDATA[A$$B$$]]></tdml:document>
     <tdml:errors>
-    <tdml:error>Parse Error</tdml:error>
-    <tdml:error>Found out of scope delimiter</tdml:error>
-    <tdml:error>$$</tdml:error>
+      <tdml:error>Parse Error</tdml:error>
+      <tdml:error>infix separator</tdml:error>
+      <tdml:error>Found enclosing delimiter: '$$'</tdml:error>
+      <tdml:error>during scan for local delimiter(s): '$'</tdml:error>
+      <tdml:error>Separator '$' from</tdml:error>
+      <tdml:error>ex:E2</tdml:error>
     </tdml:errors>
   </tdml:parserTestCase>
   
@@ -762,8 +768,12 @@
     model="ReqFieldMissingAndSepIsPrefixOfTerminator">
     <tdml:document><![CDATA[A$$B$$]]></tdml:document>
     <tdml:errors>
-    <tdml:error>Found out of scope delimiter</tdml:error>
-    <tdml:error>$$</tdml:error>
+      <tdml:error>Parse Error</tdml:error>
+      <tdml:error>postfix separator</tdml:error>
+      <tdml:error>Found enclosing delimiter: '$$'</tdml:error>
+      <tdml:error>during scan for local delimiter(s): '$'</tdml:error>
+      <tdml:error>Separator '$' from</tdml:error>
+      <tdml:error>ex:E3</tdml:error>
     </tdml:errors>
   </tdml:parserTestCase>
   
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
index 190438d2b..d3f10849f 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
@@ -588,7 +588,7 @@
 
     <tdml:errors>
       <tdml:error>Parse Error</tdml:error>
-      <tdml:error>Failed to parse infix separator</tdml:error>
+      <tdml:error>Failed to find infix separator</tdml:error>
       <tdml:error>Separator ';' not found</tdml:error>
     </tdml:errors>
 

Reply via email to