Hi all,
I'd like to report something that looks like a bug in the version of Xerces
included in JRE 7u71/7u72/8u20/8u25
The StAX API seems to produce corrupted data, depending on how many bytes the
underlying InputStream is actually reading at each invocation of read(byte[],
int, int)
The following repro case will lead to different results depending on the
version of the JRE. Am I doing something wrong?
Thanks,
Victor
------
import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamReader;
/*
* Correct output (7u67,8u11)
* rugs
*
* Incorrect output (7u71,7u72,8u20,8u25)
* bugs
*/
public class XmlReaderBug {
private static final int BYTES_PER_READ = 6;
private static final String XML =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
"<He likes=\"rugs\" because=\"they really tie the room together\"/>";
public static void main(String[] args) throws Exception {
final InputStream xmlStream = new
ByteArrayInputStream(XML.getBytes(Charset.forName("UTF-8")));
final InputStream throttledXmlStream = new
ThrottledInputStream(xmlStream, BYTES_PER_READ);
final XMLInputFactory xmlFactory = XMLInputFactory.newInstance();
final XMLStreamReader xmlStreamReader =
xmlFactory.createXMLStreamReader(throttledXmlStream);
xmlStreamReader.next();
// bugs or rugs?
System.out.println(xmlStreamReader.getAttributeValue(null, "likes"));
}
// An InputStream implementation that limits the number of bytes read by
read(byte[], int, int)
private static class ThrottledInputStream extends FilterInputStream {
private final int bytesPerRead;
public ThrottledInputStream(InputStream stream, int bytesPerRead)
throws Exception {
super(stream);
this.bytesPerRead = bytesPerRead;
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
if (off < 0 || len < 0 || len > b.length - off) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return 0;
}
// Limit bytes read
int bytesToRead = Math.min(bytesPerRead, len);
// Ensure deterministic behavior (similar to
org.apache.commons.io.IOUtils.read)
// Useless for this test case, but convenient for consistently
reproducing
// the bug with other stream implementations
int totalBytesRead = 0;
int bytesRead = 0;
do {
bytesRead = Math.max(0, in.read(b, off + totalBytesRead,
bytesToRead));
bytesToRead -= bytesRead;
totalBytesRead += bytesRead;
} while (bytesRead > 0);
// No more bytes
if (totalBytesRead == 0) {
return -1;
}
return totalBytesRead;
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]