This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git
The following commit(s) were added to refs/heads/master by this push:
new e73aa1d6 [IO-781] Make CharSequenceInputStream.available() more
correct in the face of multibyte encodings (#525)
e73aa1d6 is described below
commit e73aa1d6c52b3bd98af679af50a15c0803bf60e6
Author: Elliotte Rusty Harold <[email protected]>
AuthorDate: Sun Dec 17 16:19:27 2023 -0500
[IO-781] Make CharSequenceInputStream.available() more correct in the face
of multibyte encodings (#525)
* Make available() more correct in the face of multibyte encodings
* prefill byteBuf
* detab
* detab
* checkstyle
* checkstyle
* detab
---
.../commons/io/input/CharSequenceInputStream.java | 18 +++++++++++-------
.../commons/io/input/CharSequenceInputStreamTest.java | 11 +++++++++++
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git
a/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
b/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
index 9860ada2..e5680fb0 100644
--- a/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
+++ b/src/main/java/org/apache/commons/io/input/CharSequenceInputStream.java
@@ -180,6 +180,14 @@ public class CharSequenceInputStream extends InputStream {
this.cBuf = CharBuffer.wrap(cs);
this.cBufMark = NO_MARK;
this.bBufMark = NO_MARK;
+ try {
+ fillBuffer();
+ } catch (CharacterCodingException ex) {
+ // Reset everything without filling the buffer
+ // so the same exception can be thrown again later.
+ this.bBuf.clear();
+ this.cBuf.rewind();
+ }
}
/**
@@ -210,18 +218,14 @@ public class CharSequenceInputStream extends InputStream {
}
/**
- * Return an estimate of the number of bytes remaining in the byte stream.
- * @return the count of bytes that can be read without blocking (or
returning EOF).
+ * Return a lower bound on the number of bytes remaining in the byte
stream.
*
+ * @return the count of bytes that can be read without blocking (or
returning EOF).
* @throws IOException if an error occurs (probably not possible).
*/
@Override
public int available() throws IOException {
- // The cached entries are in bBuf; since encoding always creates at
least one byte
- // per character, we can add the two to get a better estimate (e.g. if
bBuf is empty)
- // Note that the implementation in 2.4 could return zero even though
there were
- // encoded bytes still available.
- return this.bBuf.remaining() + this.cBuf.remaining();
+ return this.bBuf.remaining();
}
@Override
diff --git
a/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
b/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
index 6c98827b..5c309e59 100644
--- a/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
+++ b/src/test/java/org/apache/commons/io/input/CharSequenceInputStreamTest.java
@@ -511,4 +511,15 @@ public class CharSequenceInputStreamTest {
assertEquals(-1, r.read(), csName);
}
}
+
+ @Test
+ // IO-781 available() returns 2 but only 1 byte is read afterwards
+ public void testAvailable() throws IOException {
+ final Charset charset = Charset.forName("Big5");
+ final CharSequenceInputStream in = new
CharSequenceInputStream("\uD800\uDC00", charset);
+ final int available = in.available();
+ final byte[] data = new byte[available];
+ final int bytesRead = in.read(data);
+ assertEquals(available, bytesRead);
+ }
}