This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new a79f185de Only mmap regular files in the CLI
a79f185de is described below

commit a79f185dea7342b53b31260c2b13c50458e6dbc2
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Jun 9 07:22:19 2025 -0400

    Only mmap regular files in the CLI
    
    When an input file for the CLI is not stdin, it currently maps the file
    to a MappedByteBuffer, which gives significant performance gains,
    especially for large files. However, for non-regular files like fifo
    files, devices, sockets, the map functions are undefined. In practice,
    we get a byte buffer but the result is a buffer with zero bytes, leading
    to the CLI parse/unparse seeing no data.
    
    To fix this, the CLI now checks if a file is regular, and only if it is
    regular will it consider mapping the file. Non-regular files use the
    existing fallback streaming behavior.
    
    DAFFODIL-3002
---
 .../main/scala/org/apache/daffodil/cli/Main.scala  | 28 +++++++++++++---------
 .../daffodil/cli/cliTest/TestCLIParsing.scala      | 27 +++++++++++++++++++++
 .../org/apache/daffodil/cli/cliTest/Util.scala     |  2 +-
 3 files changed, 45 insertions(+), 12 deletions(-)

diff --git a/daffodil-cli/src/main/scala/org/apache/daffodil/cli/Main.scala 
b/daffodil-cli/src/main/scala/org/apache/daffodil/cli/Main.scala
index 37cc20d8f..9620497a0 100644
--- a/daffodil-cli/src/main/scala/org/apache/daffodil/cli/Main.scala
+++ b/daffodil-cli/src/main/scala/org/apache/daffodil/cli/Main.scala
@@ -1272,18 +1272,24 @@ class Main(
             val input = parseOpts.infile.toOption match {
               case Some("-") | None => InputSourceDataInputStream(STDIN)
               case Some(file) => {
-                // for files <= 2GB, use a mapped byte buffer to avoid the 
overhead related to
-                // the BucketingInputSource. Larger files cannot be mapped so 
we cannot avoid it
+                // Try to use a memory mapped byte buffer for input files 
since it is
+                // significantly more efficient, especially for large files. 
Files larger than
+                // 2GB and non-regular files (e.g. fifo files, devices, unix 
sockets) cannot be
+                // mapped--in these cases we use use a normal input stream 
which is less
+                // efficient.
                 val path = Paths.get(file)
-                val size = Files.size(path)
-                if (size <= Int.MaxValue) {
-                  val fc = FileChannel.open(path, StandardOpenOption.READ)
-                  val bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, size)
-                  fc.close() // we no longer need the channel now that we've 
mapped it
-                  InputSourceDataInputStream(bb)
-                } else {
-                  val is = Files.newInputStream(path, StandardOpenOption.READ)
-                  InputSourceDataInputStream(is)
+                val optSize = if (Files.isRegularFile(path)) 
Some(Files.size(path)) else None
+                optSize match {
+                  case Some(size) if size <= Int.MaxValue => {
+                    val fc = FileChannel.open(path, StandardOpenOption.READ)
+                    val bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, size)
+                    fc.close() // we no longer need the channel now that we've 
mapped it
+                    InputSourceDataInputStream(bb)
+                  }
+                  case _ => {
+                    val is = Files.newInputStream(path, 
StandardOpenOption.READ)
+                    InputSourceDataInputStream(is)
+                  }
                 }
               }
             }
diff --git 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
index 052b735d7..8dad007e8 100644
--- 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
+++ 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
@@ -17,7 +17,10 @@
 
 package org.apache.daffodil.cli.cliTest
 
+import java.io.FileOutputStream
 import java.nio.charset.StandardCharsets.UTF_8
+import scala.sys.process.Process
+import scala.util.Using
 
 import org.apache.daffodil.cli.Main.ExitCode
 import org.apache.daffodil.cli.cliTest.Util._
@@ -25,6 +28,7 @@ import org.apache.daffodil.lib.Implicits._
 
 import org.apache.commons.io.FileUtils
 import org.junit.Assert._
+import org.junit.Assume.assumeTrue
 import org.junit.Test
 
 class TestCLIParsing {
@@ -304,6 +308,29 @@ class TestCLIParsing {
     }(ExitCode.LeftOverData)
   }
 
+  @Test def test_CLI_Parsing_SimpleParse_fifo(): Unit = {
+    // disable this test on windows since it requires the mkfifo command
+    assumeTrue("fifo test ignored on Windows", !isWindows)
+
+    val schema = path(
+      
"daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/charClassEntities.dfdl.xsd"
+    )
+
+    withTempDir { tempDir =>
+      val fifo = s"$tempDir/fifo"
+      Process("mkfifo", Seq(fifo)).!!
+
+      runCLI(args"parse -s $schema -r matrix $fifo") { cli =>
+        // Write to the fifo file. Calling Using.resource will close the fifo 
file when writing
+        // is complete and trigger an EOF in the CLI to end the parse
+        Using.resource(new FileOutputStream(fifo)) { os =>
+          os.write("0,1,2,3".getBytes("UTF-8"))
+        }
+        cli.expect("<tns:cell>3</tns:cell>")
+      }(ExitCode.Success)
+    }
+  }
+
   @Test def test_CLI_Parsing_SimpleParse_verboseMode(): Unit = {
     val schema = path(
       
"daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/charClassEntities.dfdl.xsd"
diff --git 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala
index dbe0ecf89..821615052 100644
--- a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala
+++ b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala
@@ -50,7 +50,7 @@ import org.junit.Assert.assertEquals
 
 object Util {
 
-  private val isWindows = 
System.getProperty("os.name").toLowerCase().startsWith("windows")
+  val isWindows = 
System.getProperty("os.name").toLowerCase().startsWith("windows")
 
   private val daffodilBinPath = {
     val ext = if (isWindows) ".bat" else ""

Reply via email to