Author: niallp
Date: Thu Dec 23 03:12:09 2010
New Revision: 1052161

URL: http://svn.apache.org/viewvc?rev=1052161&view=rev
Log:
IO-258 - Fix XmlStreamReader consumes the stream during encoding detection

Modified:
    
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
    
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java

Modified: 
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
URL: 
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java?rev=1052161&r1=1052160&r2=1052161&view=diff
==============================================================================
--- 
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
 (original)
+++ 
commons/proper/io/trunk/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
 Thu Dec 23 03:12:09 2010
@@ -201,8 +201,10 @@ public class XmlStreamReader extends Rea
      */
     public XmlStreamReader(InputStream is, boolean lenient, String 
defaultEncoding) throws IOException {
         this.defaultEncoding = defaultEncoding;
-        this.encoding = doRawStream(is, lenient);
-        this.reader = new InputStreamReader(is, encoding);
+        BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, 
BUFFER_SIZE), false, BOMS);
+        BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
+        this.encoding = doRawStream(bom, pis, lenient);
+        this.reader = new InputStreamReader(pis, encoding);
     }
 
     /**
@@ -250,12 +252,14 @@ public class XmlStreamReader extends Rea
         boolean lenient = true;
         String contentType = conn.getContentType();
         InputStream is = conn.getInputStream();
+        BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, 
BUFFER_SIZE), false, BOMS);
+        BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
         if (conn instanceof HttpURLConnection || contentType != null) {
-            this.encoding = doHttpStream(is, contentType, lenient);
+            this.encoding = doHttpStream(bom, pis, contentType, lenient);
         } else {
-            this.encoding = doRawStream(is, lenient);
+            this.encoding = doRawStream(bom, pis, lenient);
         }
-        this.reader = new InputStreamReader(is, encoding);
+        this.reader = new InputStreamReader(pis, encoding);
     }
 
     /**
@@ -317,8 +321,10 @@ public class XmlStreamReader extends Rea
     public XmlStreamReader(InputStream is, String httpContentType,
             boolean lenient, String defaultEncoding) throws IOException {
         this.defaultEncoding = defaultEncoding;
-        this.encoding = doHttpStream(is, httpContentType, lenient);
-        this.reader = new InputStreamReader(is, encoding);
+        BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, 
BUFFER_SIZE), false, BOMS);
+        BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
+        this.encoding = doHttpStream(bom, pis, httpContentType, lenient);
+        this.reader = new InputStreamReader(pis, encoding);
     }
 
     /**
@@ -394,16 +400,15 @@ public class XmlStreamReader extends Rea
     /**
      * Process the raw stream.
      *
-     * @param is InputStream to create the reader from.
+     * @param bom BOMInputStream to detect byte order marks
+     * @param pis BOMInputStream to guess XML encoding
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
      * @return the encoding to be used
      * @throws IOException thrown if there is a problem reading the stream.
      */
-    private String doRawStream(InputStream is, boolean lenient)
+    private String doRawStream(BOMInputStream bom, BOMInputStream pis, boolean 
lenient)
             throws IOException {
-        BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, 
BUFFER_SIZE), false, BOMS);
-        BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
         String bomEnc      = bom.getBOMCharsetName();
         String xmlGuessEnc = pis.getBOMCharsetName();
         String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
@@ -411,7 +416,7 @@ public class XmlStreamReader extends Rea
             return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc);
         } catch (XmlStreamReaderException ex) {
             if (lenient) {
-                return doLenientDetection(null, is, ex);
+                return doLenientDetection(null, ex);
             } else {
                 throw ex;
             }
@@ -421,17 +426,16 @@ public class XmlStreamReader extends Rea
     /**
      * Process a HTTP stream.
      *
-     * @param is InputStream to create the reader from.
+     * @param bom BOMInputStream to detect byte order marks
+     * @param pis BOMInputStream to guess XML encoding
      * @param httpContentType The HTTP content type
      * @param lenient indicates if the charset encoding detection should be
      *        relaxed.
      * @return the encoding to be used
      * @throws IOException thrown if there is a problem reading the stream.
      */
-    private String doHttpStream(InputStream is, String httpContentType,
+    private String doHttpStream(BOMInputStream bom, BOMInputStream pis, String 
httpContentType,
             boolean lenient) throws IOException {
-        BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, 
BUFFER_SIZE), false, BOMS);
-        BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES);
         String bomEnc      = bom.getBOMCharsetName();
         String xmlGuessEnc = pis.getBOMCharsetName();
         String xmlEnc = getXmlProlog(pis, xmlGuessEnc);
@@ -440,7 +444,7 @@ public class XmlStreamReader extends Rea
                     xmlGuessEnc, xmlEnc, lenient);
         } catch (XmlStreamReaderException ex) {
             if (lenient) {
-                return doLenientDetection(httpContentType, is, ex);
+                return doLenientDetection(httpContentType, ex);
             } else {
                 throw ex;
             }
@@ -452,12 +456,11 @@ public class XmlStreamReader extends Rea
      *
      * @param httpContentType content-type header to use for the resolution of
      *        the charset encoding.
-     * @param is the unconsumed InputStream
      * @param ex The thrown exception
      * @return the encoding
      * @throws IOException thrown if there is a problem reading the stream.
      */
-    private String doLenientDetection(String httpContentType, InputStream is,
+    private String doLenientDetection(String httpContentType,
             XmlStreamReaderException ex) throws IOException {
         if (httpContentType != null && 
httpContentType.startsWith("text/html")) {
             httpContentType = httpContentType.substring("text/html".length());

Modified: 
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
URL: 
http://svn.apache.org/viewvc/commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java?rev=1052161&r1=1052160&r2=1052161&view=diff
==============================================================================
--- 
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
 (original)
+++ 
commons/proper/io/trunk/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java
 Thu Dec 23 03:12:09 2010
@@ -26,6 +26,8 @@ import java.text.MessageFormat;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.commons.io.IOUtils;
+
 import junit.framework.TestCase;
 
 /**
@@ -220,6 +222,25 @@ public class XmlStreamReaderTest extends
                 "UTF-8", "UTF-8");
     }
 
+    
+    public void testRawContent() throws Exception {
+        String encoding = "UTF-8";
+        String xml = getXML("no-bom", XML3, encoding, encoding);
+        ByteArrayInputStream is = new 
ByteArrayInputStream(xml.getBytes(encoding));
+        XmlStreamReader xmlReader = new XmlStreamReader(is);
+        assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
+        assertEquals("Check content", xml, IOUtils.toString(xmlReader));
+    }
+
+    public void testHttpContent() throws Exception {
+        String encoding = "UTF-8";
+        String xml = getXML("no-bom", XML3, encoding, encoding);
+        ByteArrayInputStream is = new 
ByteArrayInputStream(xml.getBytes(encoding));
+        XmlStreamReader xmlReader = new XmlStreamReader(is, encoding);
+        assertEquals("Check encoding", xmlReader.getEncoding(), encoding);
+        assertEquals("Check content", xml, IOUtils.toString(xmlReader));
+    }
+
     public void _testAlternateDefaultEncoding(String cT, String bomEnc,
             String streamEnc, String prologEnc, String alternateEnc)
             throws Exception {
@@ -345,13 +366,11 @@ public class XmlStreamReaderTest extends
         if (bom == null) {
             bom = new int[0];
         }
-        MessageFormat xml = XMLs.get(xmlType);
         for (int i = 0; i < bom.length; i++) {
             baos.write(bom[i]);
         }
         Writer writer = new OutputStreamWriter(baos, streamEnc);
-        String info = INFO.format(new Object[] { bomType, xmlType, prologEnc 
});
-        String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info 
});
+        String xmlDoc = getXML(bomType, xmlType, streamEnc, prologEnc);
         writer.write(xmlDoc);
 
         // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
@@ -364,4 +383,15 @@ public class XmlStreamReaderTest extends
         writer.close();
         return new ByteArrayInputStream(baos.toByteArray());
     }
+
+    /**
+     * Create the XML.
+     */
+    private String getXML(String bomType, String xmlType,
+            String streamEnc, String prologEnc) {
+        MessageFormat xml = XMLs.get(xmlType);
+        String info = INFO.format(new Object[] { bomType, xmlType, prologEnc 
});
+        String xmlDoc = xml.format(new Object[] { streamEnc, prologEnc, info 
});
+        return xmlDoc;
+    }
 }


Reply via email to