(hadoop) branch branch-3.4 updated: HADOOP-19400: Expand specification and contract test coverage for InputStream reads.

cnauroth Tue, 11 Mar 2025 10:09:17 -0700

This is an automated email from the ASF dual-hosted git repository.

cnauroth pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/hadoop.git



The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new c7d631ec56f HADOOP-19400: Expand specification and contract test 
coverage for InputStream reads.
c7d631ec56f is described below

commit c7d631ec56fe8c9749cf2f2ca537ba3e28a0015a
Author: Chris Nauroth <cnaur...@apache.org>
AuthorDate: Tue Mar 11 16:39:43 2025 +0000

    HADOOP-19400: Expand specification and contract test coverage for 
InputStream reads.
    
    Closes #7367
    
    Signed-off-by: Anuj Modi <anujm...@apache.org>
    Signed-off-by: Steve Loughran <ste...@apache.org>
    (cherry picked from commit 32da0d6d1449a5fa1a7f9dfe0f0681d7c2dbfc25)
---
 .../site/markdown/filesystem/fsdatainputstream.md  |  22 +++-
 .../fs/contract/AbstractContractOpenTest.java      | 136 +++++++++++++++++++++
 .../hadoop/fs/contract/ContractTestUtils.java      |  53 ++++++++
 3 files changed, 207 insertions(+), 4 deletions(-)

diff --git 
a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
 
b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
index e671cb6fa7c..92ef696db7a 100644
--- 
a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
+++ 
b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md
@@ -129,10 +129,11 @@ as implicitly set in `pos`.
 #### Preconditions
 
     isOpen(FSDIS)
-    buffer != null else raise NullPointerException
-    length >= 0
-    offset < len(buffer)
-    length <= len(buffer) - offset
+    buffer != null else raise NullPointerException, IllegalArgumentException
+    offset >= 0 else raise IndexOutOfBoundsException
+    length >= 0 else raise IndexOutOfBoundsException, IllegalArgumentException
+    offset < len(buffer) else raise IndexOutOfBoundsException
+    length <= len(buffer) - offset else raise IndexOutOfBoundsException
     pos >= 0 else raise EOFException, IOException
 
 Exceptions that may be raised on precondition failure are
@@ -175,6 +176,19 @@ must block until at least one byte is returned. Thus, for 
any data source
 of length greater than zero, repeated invocations of this `read()` operation
 will eventually read all the data.
 
+#### Implementation Notes
+
+1. If the caller passes a `null` buffer, then an unchecked exception MUST be 
thrown. The base JDK
+`InputStream` implementation throws `NullPointerException`. HDFS historically 
used
+`IllegalArgumentException`. Implementations MAY use either of these.
+1. If the caller passes a negative value for `length`, then an unchecked 
exception MUST be thrown.
+The base JDK `InputStream` implementation throws `IndexOutOfBoundsException`. 
HDFS historically used
+`IllegalArgumentException`. Implementations MAY use either of these.
+1. Reads through any method MUST return the same data.
+1. Callers MAY interleave calls to different read methods (single-byte and 
multi-byte) on the same
+stream. The stream MUST return the same underlying data, regardless of the 
specific read calls or
+their ordering.
+
 ### <a name="Seekable.seek"></a>`Seekable.seek(s)`
 
 
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java
index 3598d33680e..bd920fd03b0 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractOpenTest.java
@@ -38,6 +38,9 @@
 import static 
org.apache.hadoop.fs.contract.ContractTestUtils.compareByteArrays;
 import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
 import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.readDataset;
+import static 
org.apache.hadoop.fs.contract.ContractTestUtils.readDatasetSingleByteReads;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.readNBytes;
 import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
 import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture;
@@ -408,4 +411,137 @@ public void testFloatingPointLength() throws Throwable {
         .isEqualTo(len);
   }
 
+  @Test
+  public void testInputStreamReadNullBuffer() throws Throwable {
+    // The JDK base InputStream (and by extension LocalFSFileInputStream) 
throws
+    // NullPointerException. Historically, DFSInputStream has thrown 
IllegalArgumentException
+    // instead. Allow either behavior.
+    describe("Attempting to read into a null buffer should throw 
IllegalArgumentException or " +
+        "NullPointerException");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    try (FSDataInputStream is = fs.openFile(path).build().get()) {
+      Assertions.assertThatThrownBy(() -> is.read(null, 0, 10))
+          .isInstanceOfAny(IllegalArgumentException.class, 
NullPointerException.class);
+    }
+  }
+
+  @Test
+  public void testInputStreamReadNegativePosition() throws Throwable {
+    describe("Attempting to read into a negative position should throw 
IndexOutOfBoundsException");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    try (FSDataInputStream is = fs.openFile(path).build().get()) {
+      Assertions.assertThatThrownBy(() -> is.read(new byte[10], -1, 10))
+          .isInstanceOf(IndexOutOfBoundsException.class);
+    }
+  }
+
+  @Test
+  public void testInputStreamReadNegativeLength() throws Throwable {
+    // The JDK base InputStream (and by extension LocalFSFileInputStream) 
throws
+    // IndexOutOfBoundsException. Historically, DFSInputStream has thrown 
IllegalArgumentException
+    // instead. Allow either behavior.
+    describe("Attempting to read into a null buffer should throw 
IllegalArgumentException or " +
+        "IndexOutOfBoundsException");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    try (FSDataInputStream is = fs.openFile(path).build().get()) {
+      Assertions.assertThatThrownBy(() -> is.read(new byte[10], 0, -1))
+          .isInstanceOfAny(IllegalArgumentException.class, 
IndexOutOfBoundsException.class);
+    }
+  }
+
+  @Test
+  public void testInputStreamReadTooLong() throws Throwable {
+    describe("Attempting a read longer than the buffer should throw 
IndexOutOfBoundsException");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    try (FSDataInputStream is = fs.openFile(path).build().get()) {
+      Assertions.assertThatThrownBy(() -> is.read(new byte[10], 0, 11))
+          .isInstanceOf(IndexOutOfBoundsException.class);
+    }
+  }
+
+  @Test
+  public void testInputStreamReadZeroLengthRead() throws Throwable {
+    describe("Reading 0 bytes is a no-op");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    try (FSDataInputStream is = fs.openFile(path).build().get()) {
+      Assertions.assertThat(is.read(new byte[10], 0, 0)).describedAs("bytes 
read").isEqualTo(0);
+    }
+  }
+
+  @Test
+  public void testInputStreamConsistentEOF() throws Throwable {
+    describe("Both single-byte and multi-byte read should report EOF after 
consuming stream");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    try (FSDataInputStream is = fs.openFile(path).build().get()) {
+      IOUtils.skipFully(is, len);
+      Assertions.assertThat(is.read()).describedAs("single byte 
EOF").isEqualTo(-1);
+      Assertions.assertThat(is.read(new byte[10], 0, 10)).describedAs("multi 
byte EOF")
+              .isEqualTo(-1);
+    }
+  }
+
+  @Test
+  public void testInputStreamSingleAndMultiByteReadsEqual() throws Throwable {
+    describe("Single-byte and multi-byte read should return the same bytes");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    byte[] multiByteReads = readDataset(fs, path, len);
+    byte[] singleByteReads = readDatasetSingleByteReads(fs, path, len);
+    compareByteArrays(multiByteReads, singleByteReads, len);
+  }
+
+  @Test
+  public void testInputStreamMixedSingleAndMultiByteReadsEqual() throws 
Throwable {
+    describe("Mixed single and multi-byte reads on the same stream should 
return the same bytes");
+    Path path = methodPath();
+    FileSystem fs = getFileSystem();
+    int len = 4096;
+    createFile(fs, path, true,
+        dataset(len, 0x40, 0x80));
+    byte[] expected = readDataset(fs, path, len);
+    byte[] actual = new byte[len];
+    int readSize = 128;
+    try (FSDataInputStream is = fs.openFile(path).build().get()) {
+      for (int offset = 0; offset < len; offset = offset + readSize + 
readSize) {
+        if (readNBytes(is, actual, offset, readSize) != readSize) {
+          fail("End of file reached before reading fully.");
+        }
+        for (int i = 0; i < readSize; ++i) {
+          int nextByte = is.read();
+          if (-1 == nextByte) {
+            fail("End of file reached before reading fully.");
+          }
+          actual[offset + readSize + i] = (byte)nextByte;
+        }
+      }
+    }
+    compareByteArrays(expected, actual, len);
+  }
 }
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java
index 739640aa34b..071fbcf8e00 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java
@@ -219,6 +219,8 @@ public static void writeDataset(FileSystem fs, Path path, 
byte[] src,
    * Read the file and convert to a byte dataset.
    * This implements readfully internally, so that it will read
    * in the file without ever having to seek()
+   * The read is performed through a sequence of calls to multi-byte
+   * {@link InputStream#read(byte[], int, int)}.
    * @param fs filesystem
    * @param path path to read from
    * @param len length of data to read
@@ -242,6 +244,57 @@ public static byte[] readDataset(FileSystem fs, Path path, 
int len)
     return dest;
   }
 
+  /**
+   * Read a file completely and return the contents in a byte buffer. The read 
is performed through
+   * repeated calls to single-byte {@link InputStream#read()}.
+   * @param fs filesystem
+   * @param path path to read from
+   * @param len length of data to read
+   * @return the bytes
+   * @throws IOException IO problems
+   */
+  public static byte[] readDatasetSingleByteReads(FileSystem fs, Path path, 
int len)
+      throws IOException {
+    byte[] dest = new byte[len];
+    try (FSDataInputStream in = fs.open(path)) {
+      for (int i = 0; i < len; ++i) {
+        int nextByte = in.read();
+        if (-1 == nextByte) {
+          throw new EOFException("End of file reached before reading fully.");
+        }
+        dest[i] = (byte)nextByte;
+      }
+    }
+    return dest;
+  }
+
+  /**
+   * Reads {@code len} bytes into offset {@code off} of target buffer {@code 
b}, up to EOF,
+   * potentially blocking until enough bytes are available on the stream. This 
is similar to
+   * {@code InputStream#readNBytes(int)} introduced in Java 11, except the 
caller owns allocation of
+   * the target buffer.
+   * @param is input stream to read
+   * @param b target buffer
+   * @param off offset within {@code b}
+   * @param len length of data to read
+   * @return number of bytes read
+   * @throws IOException IO problems
+   */
+  public static int readNBytes(InputStream is, byte[] b, int off, int len)
+      throws IOException {
+    int totalBytes = 0;
+    while (len > 0) {
+      int nbytes = is.read(b, off, len);
+      if (nbytes < 0) {
+        break;
+      }
+      totalBytes += nbytes;
+      off += nbytes;
+      len -= nbytes;
+    }
+    return totalBytes;
+  }
+
   /**
    * Read a file, verify its length and contents match the expected array.
    * @param fs filesystem


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

(hadoop) branch branch-3.4 updated: HADOOP-19400: Expand specification and contract test coverage for InputStream reads.

Reply via email to