Author: niallp Date: Thu Dec 23 03:12:09 2010 New Revision: 1052161 URL: http://svn.apache.org/viewvc?rev=1052161&view=rev Log: IO-258 - Fix XmlStreamReader consumes the stream during encoding detection
Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java Modified: commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java?rev=1052161&r1=1052160&r2=1052161&view=diff ============================================================================== --- commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java (original) +++ commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java Thu Dec 23 03:12:09 2010 @@ -201,8 +201,10 @@ public class XmlStreamReader extends Rea */ public XmlStreamReader(InputStream is, boolean lenient, String defaultEncoding) throws IOException { this.defaultEncoding = defaultEncoding; - this.encoding = doRawStream(is, lenient); - this.reader = new InputStreamReader(is, encoding); + BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); + BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + this.encoding = doRawStream(bom, pis, lenient); + this.reader = new InputStreamReader(pis, encoding); } /** @@ -250,12 +252,14 @@ public class XmlStreamReader extends Rea boolean lenient = true; String contentType = conn.getContentType(); InputStream is = conn.getInputStream(); + BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); + BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); if (conn instanceof HttpURLConnection || contentType != null) { - this.encoding = doHttpStream(is, contentType, lenient); + this.encoding = doHttpStream(bom, pis, contentType, lenient); } else { - this.encoding = doRawStream(is, lenient); + this.encoding = doRawStream(bom, pis, lenient); } - this.reader = new InputStreamReader(is, encoding); + this.reader = new InputStreamReader(pis, encoding); } /** @@ -317,8 +321,10 @@ public class XmlStreamReader extends Rea public XmlStreamReader(InputStream is, String httpContentType, boolean lenient, String defaultEncoding) throws IOException { this.defaultEncoding = defaultEncoding; - this.encoding = doHttpStream(is, httpContentType, lenient); - this.reader = new InputStreamReader(is, encoding); + BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); + BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); + this.encoding = doHttpStream(bom, pis, httpContentType, lenient); + this.reader = new InputStreamReader(pis, encoding); } /** @@ -394,16 +400,15 @@ public class XmlStreamReader extends Rea /** * Process the raw stream. * - * @param is InputStream to create the reader from. + * @param bom BOMInputStream to detect byte order marks + * @param pis BOMInputStream to guess XML encoding * @param lenient indicates if the charset encoding detection should be * relaxed. * @return the encoding to be used * @throws IOException thrown if there is a problem reading the stream. */ - private String doRawStream(InputStream is, boolean lenient) + private String doRawStream(BOMInputStream bom, BOMInputStream pis, boolean lenient) throws IOException { - BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); - BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); String bomEnc = bom.getBOMCharsetName(); String xmlGuessEnc = pis.getBOMCharsetName(); String xmlEnc = getXmlProlog(pis, xmlGuessEnc); @@ -411,7 +416,7 @@ public class XmlStreamReader extends Rea return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); } catch (XmlStreamReaderException ex) { if (lenient) { - return doLenientDetection(null, is, ex); + return doLenientDetection(null, ex); } else { throw ex; } @@ -421,17 +426,16 @@ public class XmlStreamReader extends Rea /** * Process a HTTP stream. * - * @param is InputStream to create the reader from. + * @param bom BOMInputStream to detect byte order marks + * @param pis BOMInputStream to guess XML encoding * @param httpContentType The HTTP content type * @param lenient indicates if the charset encoding detection should be * relaxed. * @return the encoding to be used * @throws IOException thrown if there is a problem reading the stream. */ - private String doHttpStream(InputStream is, String httpContentType, + private String doHttpStream(BOMInputStream bom, BOMInputStream pis, String httpContentType, boolean lenient) throws IOException { - BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); - BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); String bomEnc = bom.getBOMCharsetName(); String xmlGuessEnc = pis.getBOMCharsetName(); String xmlEnc = getXmlProlog(pis, xmlGuessEnc); @@ -440,7 +444,7 @@ public class XmlStreamReader extends Rea xmlGuessEnc, xmlEnc, lenient); } catch (XmlStreamReaderException ex) { if (lenient) { - return doLenientDetection(httpContentType, is, ex); + return doLenientDetection(httpContentType, ex); } else { throw ex; } @@ -452,12 +456,11 @@ public class XmlStreamReader extends Rea * * @param httpContentType content-type header to use for the resolution of * the charset encoding. - * @param is the unconsumed InputStream * @param ex The thrown exception * @return the encoding * @throws IOException thrown if there is a problem reading the stream. */ - private String doLenientDetection(String httpContentType, InputStream is, + private String doLenientDetection(String httpContentType, XmlStreamReaderException ex) throws IOException { if (httpContentType != null && httpContentType.startsWith("text/html")) { httpContentType = httpContentType.substring("text/html".length()); Modified: commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java URL: http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java?rev=1052161&r1=1052160&r2=1052161&view=diff ============================================================================== --- commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java (original) +++ commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java Thu Dec 23 03:12:09 2010 @@ -26,6 +26,8 @@ import java.text.MessageFormat; import java.util.HashMap; import java.util.Map; +import org.apache.commons.io.IOUtils; + import junit.framework.TestCase; /** @@ -220,6 +222,25 @@ public class XmlStreamReaderTest extends "UTF-8", "UTF-8"); } + + public void testRawContent() throws Exception { + String encoding = "UTF-8"; + String xml = getXML("no-bom", XML3, encoding, encoding); + ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); + XmlStreamReader xmlReader = new XmlStreamReader(is); + assertEquals("Check encoding", xmlReader.getEncoding(), encoding); + assertEquals("Check content", xml, IOUtils.toString(xmlReader)); + } + + public void testHttpContent() throws Exception { + String encoding = "UTF-8"; + String xml = getXML("no-bom", XML3, encoding, encoding); + ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); + XmlStreamReader xmlReader = new XmlStreamReader(is, encoding); + assertEquals("Check encoding", xmlReader.getEncoding(), encoding); + assertEquals("Check content", xml, IOUtils.toString(xmlReader)); + } + public void _testAlternateDefaultEncoding(String cT, String bomEnc, String streamEnc, String prologEnc, String alternateEnc) throws Exception { @@ -345,13 +366,11 @@ public class XmlStreamReaderTest extends if (bom == null) { bom = new int[0]; } - MessageFormat xml = XMLs.get(xmlType); for (int i = 0; i < bom.length; i++) { baos.write(bom[i]); } Writer writer = new OutputStreamWriter(baos, streamEnc); - String info = INFO.format(new Object[] { bomType, xmlType, prologEnc }); - String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info }); + String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc); writer.write(xmlDoc); // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE @@ -364,4 +383,15 @@ public class XmlStreamReaderTest extends writer.close(); return new ByteArrayInputStream(baos.toByteArray()); } + + /** + * Create the XML. + */ + private String getXML(String bomType, String xmlType, + String streamEnc, String prologEnc) { + MessageFormat xml = XMLs.get(xmlType); + String info = INFO.format(new Object[] { bomType, xmlType, prologEnc }); + String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info }); + return xmlDoc; + } }