This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git


The following commit(s) were added to refs/heads/master by this push:
     new e73aa1d6 [IO-781] Make CharSequenceInputStream.available() more 
correct in the face of multibyte encodings (#525)
e73aa1d6 is described below

commit e73aa1d6c52b3bd98af679af50a15c0803bf60e6
Author: Elliotte Rusty Harold <[email protected]>
AuthorDate: Sun Dec 17 16:19:27 2023 -0500

    [IO-781] Make CharSequenceInputStream.available() more correct in the face 
of multibyte encodings (#525)
    
    * Make available() more correct in the face of multibyte encodings
    
    * prefill byteBuf
    
    * detab
    
    * detab
    
    * checkstyle
    
    * checkstyle
    
    * detab
---
 .../commons/io/input/CharSequenceInputStream.java      | 18 +++++++++++-------
 .../commons/io/input/CharSequenceInputStreamTest.java  | 11 +++++++++++
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git 
a/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java 
b/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
index 9860ada2..e5680fb0 100644
--- a/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
+++ b/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
@@ -180,6 +180,14 @@ public class CharSequenceInputStream extends InputStream {
         this.cBuf = CharBuffer.wrap(cs);
         this.cBufMark = NO_MARK;
         this.bBufMark = NO_MARK;
+        try {
+            fillBuffer();
+        } catch (CharacterCodingException ex) {
+            // Reset everything without filling the buffer
+            // so the same exception can be thrown again later.
+            this.bBuf.clear();
+            this.cBuf.rewind();
+        }
     }
 
     /**
@@ -210,18 +218,14 @@ public class CharSequenceInputStream extends InputStream {
     }
 
     /**
-     * Return an estimate of the number of bytes remaining in the byte stream.
-     * @return the count of bytes that can be read without blocking (or 
returning EOF).
+     * Return a lower bound on the number of bytes remaining in the byte 
stream.
      *
+     * @return the count of bytes that can be read without blocking (or 
returning EOF).
      * @throws IOException if an error occurs (probably not possible).
      */
     @Override
     public int available() throws IOException {
-        // The cached entries are in bBuf; since encoding always creates at 
least one byte
-        // per character, we can add the two to get a better estimate (e.g. if 
bBuf is empty)
-        // Note that the implementation in 2.4 could return zero even though 
there were
-        // encoded bytes still available.
-        return this.bBuf.remaining() + this.cBuf.remaining();
+        return this.bBuf.remaining();
     }
 
     @Override
diff --git 
a/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java 
b/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
index 6c98827b..5c309e59 100644
--- a/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
+++ b/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
@@ -511,4 +511,15 @@ public class CharSequenceInputStreamTest {
             assertEquals(-1, r.read(), csName);
         }
     }
+
+    @Test
+    // IO-781 available() returns 2 but only 1 byte is read afterwards
+    public void testAvailable() throws IOException {
+        final Charset charset = Charset.forName("Big5");
+        final CharSequenceInputStream in = new 
CharSequenceInputStream("\uD800\uDC00", charset);
+        final int available = in.available();
+        final byte[] data = new byte[available];
+        final int bytesRead = in.read(data);
+        assertEquals(available, bytesRead);
+    }
 }

Reply via email to