This patch fixes the problems encountered parsing larger XML files, and adds
some small validation features and fixes. The parser has now been tested
successfully with a number of large XML files and external general entities,
so I'm reverting the service description for the SAX parser factory.

2005-12-27  Chris Burdess  <[EMAIL PROTECTED]>

        * gnu/java/net/CRLFInputStream.java,
          gnu/xml/stream/SAXParser.java,
          gnu/xml/stream/XMLParser.java,
          resource/META-INF/services/javax.xml.parsers.SAXParserFactory:
          Fix XML parser stream issues. Add support for ignorable whitespace
          to SAX parser. Allow validating parser to parse invalid files.
        * gnu/xml/stream/BufferedReader.java,
          gnu/xml/stream/XMLInputStreamReader.java: Move buffering
          functionality of XMLInputStreamReader to BufferedReader.

-- 
Chris Burdess
  "They that can give up essential liberty to obtain a little safety
  deserve neither liberty nor safety." - Benjamin Franklin
Index: resource/META-INF/services/javax.xml.parsers.SAXParserFactory
===================================================================
RCS file: 
/cvsroot/classpath/classpath/resource/META-INF/services/javax.xml.parsers.SAXParserFactory,v
retrieving revision 1.3
diff -u -r1.3 javax.xml.parsers.SAXParserFactory
--- resource/META-INF/services/javax.xml.parsers.SAXParserFactory       25 Dec 
2005 09:49:42 -0000      1.3
+++ resource/META-INF/services/javax.xml.parsers.SAXParserFactory       27 Dec 
2005 19:42:43 -0000
@@ -1 +1 @@
-gnu.xml.aelfred2.JAXPFactory
+gnu.xml.stream.SAXParserFactory
Index: gnu/java/net/CRLFInputStream.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/java/net/CRLFInputStream.java,v
retrieving revision 1.4
diff -u -r1.4 CRLFInputStream.java
--- gnu/java/net/CRLFInputStream.java   2 Jul 2005 20:32:13 -0000       1.4
+++ gnu/java/net/CRLFInputStream.java   27 Dec 2005 19:42:43 -0000
@@ -128,7 +128,7 @@
             in.reset();
             if (i != -1)
               {
-                l = in.read(b, off, i + 1); // read to CR
+                l = in.read(b, off, (i + 1) - off); // read to CR
                 in.read(); // skip LF
                 b[i] = LF; // fix CR as LF
               }
Index: gnu/xml/stream/BufferedReader.java
===================================================================
RCS file: gnu/xml/stream/BufferedReader.java
diff -N gnu/xml/stream/BufferedReader.java
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ gnu/xml/stream/BufferedReader.java  27 Dec 2005 19:42:43 -0000
@@ -0,0 +1,208 @@
+/* BufferedReader.java -- 
+   Copyright (C) 2005  Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.xml.stream;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * A mark-capable buffered reader.
+ *
+ * @author <a href='mailto:[EMAIL PROTECTED]'>Chris Burdess</a>
+ */
+class BufferedReader
+  extends Reader
+{
+
+  static final int DEFAULT_BUFFER_SIZE = 4096;
+
+  final Reader in;
+  char[] buf;
+  int pos, count, markpos, marklimit, bufferSize;
+
+  BufferedReader(Reader in)
+  {
+    this(in, DEFAULT_BUFFER_SIZE);
+  }
+         
+  BufferedReader(Reader in, int bufferSize)
+  {
+    if (bufferSize < 1)
+      throw new IllegalArgumentException();
+    this.in = in;
+    this.bufferSize = bufferSize;
+    buf = new char[bufferSize];
+    pos = count = bufferSize;
+  }
+
+  public void close()
+    throws IOException
+  {
+    buf = null;
+    pos = count = 0;
+    markpos = -1;
+    in.close();
+  }
+
+  public void mark(int readlimit)
+    throws IOException
+  {
+    marklimit = readlimit;
+    markpos = pos;
+    //System.out.println("--mark@"+Integer.toHexString(pos)+":"+marklimit);
+  }
+
+  public boolean markSupported()
+  {
+    return true;
+  }
+
+  public int read()
+    throws IOException
+  {
+    if (pos >= count && !refill())
+      return -1;
+    //System.out.println("--read1@"+Integer.toHexString(pos)+":"+new 
String(buf, pos, 1));
+    return (int) buf[pos++];
+  }
+
+  public int read(char[] b)
+    throws IOException
+  {
+    return read(b, 0, b.length);
+  }
+
+  public int read(char[] b, int off, int len)
+    throws IOException
+  {
+    if (off < 0 || len < 0 || b.length - off < len)
+      throw new IndexOutOfBoundsException();
+    
+    if (len == 0)
+      return 0;
+    
+    if (pos >= count && !refill())
+      return -1;
+                
+    int ret = Math.min(count - pos, len);
+    System.arraycopy(buf, pos, b, off, ret);
+    //System.out.println("--read2@"+Integer.toHexString(pos)+":"+new String(b, 
off, ret)+" ("+ret+")");
+    pos += ret;
+    off += ret;
+    len -= ret;
+    
+    while (len > 0 && refill())
+      {
+        int remain = Math.min(count - pos, len);
+        System.arraycopy(buf, pos, b, off, remain);
+        //System.out.println("--read3@"+Integer.toHexString(pos)+":"+new 
String(b, off, remain));
+        pos += remain;
+        off += remain;
+        len -= remain;
+        ret += remain;
+      }
+    
+    return ret;
+  }
+
+  public void reset()
+    throws IOException
+  {
+    if (markpos == -1)
+      throw new IOException(buf == null ? "Stream closed." : "Invalid mark.");
+    pos = markpos;
+    //System.out.println("--reset@"+Integer.toHexString(pos));
+  }
+
+  public long skip(long n)
+    throws IOException
+  {
+    if (buf == null)
+      throw new IOException("Stream closed.");
+    //System.out.println("--skip:"+n);
+    final long origN = n;
+    while (n > 0L)
+      {
+        if (pos >= count && !refill())
+          break;
+        int numread = (int) Math.min((long) (count - pos), n);
+        pos += numread;
+        n -= numread;
+      }
+    return origN - n;
+  }
+
+  private boolean refill()
+    throws IOException
+  {
+    if (buf == null)
+      throw new IOException("Stream closed.");
+
+    //System.out.println("--refill:pos="+Integer.toHexString(pos)+" 
count="+Integer.toHexString(count));
+    int markcount = count - markpos;
+    if (markpos == -1 || markcount >= marklimit)
+      {
+        markpos = -1;
+        pos = count = 0;
+        //System.out.println("--refill1@"+Integer.toHexString(pos));
+      }
+    else
+      {
+        char[] newbuf = buf;
+        if (markpos < bufferSize)
+          { 
+            newbuf = new char[count - markpos + bufferSize];
+          }
+        System.arraycopy(buf, markpos, newbuf, 0, markcount);
+        buf = newbuf;
+        count = markcount;
+        pos -= markpos;
+        markpos = 0;
+        
//System.out.println("--refill2@"+Integer.toHexString(pos)+":"+Integer.toHexString(count));
+      }
+
+    int numread = in.read(buf, count, bufferSize);
+    if (numread <= 0)
+      return false;
+
+    //System.out.println("--refill3("+Integer.toHexString(numread)+"):"+new 
String(buf, count, numread));
+    count += numread;
+    return true;
+  }
+
+}
Index: gnu/xml/stream/SAXParser.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/SAXParser.java,v
retrieving revision 1.7
diff -u -r1.7 SAXParser.java
--- gnu/xml/stream/SAXParser.java       24 Dec 2005 17:56:32 -0000      1.7
+++ gnu/xml/stream/SAXParser.java       27 Dec 2005 19:42:43 -0000
@@ -357,8 +357,10 @@
                 if (contentHandler != null)
                   {
                     char[] b = reader.getTextCharacters();
-                    // TODO determine whether whitespace is ignorable
-                    contentHandler.characters(b, 0, b.length);
+                    if (isIgnorableWhitespace(parser, b, false))
+                      contentHandler.ignorableWhitespace(b, 0, b.length);
+                    else
+                      contentHandler.characters(b, 0, b.length);
                   }
                 break;
               case XMLStreamConstants.CDATA:
@@ -367,8 +369,10 @@
                 if (contentHandler != null)
                   {
                     char[] b = reader.getTextCharacters();
-                    // TODO determine whether whitespace and ignorable
-                    contentHandler.characters(b, 0, b.length);
+                    if (isIgnorableWhitespace(parser, b, true))
+                      contentHandler.ignorableWhitespace(b, 0, b.length);
+                    else
+                      contentHandler.characters(b, 0, b.length);
                   }
                 if (lexicalHandler != null)
                   lexicalHandler.endCDATA();
@@ -619,6 +623,31 @@
       }
   }
 
+  private boolean isIgnorableWhitespace(XMLParser reader, char[] b,
+                                        boolean testCharacters)
+  {
+    XMLParser.Doctype doctype = reader.doctype;
+    if (doctype == null)
+      return false;
+    String currentElement = reader.getCurrentElement();
+    XMLParser.ContentModel model = doctype.getElementModel(currentElement);
+    if (model == null || model.type != XMLParser.ContentModel.ELEMENT)
+      return false;
+    boolean white = true;
+    if (testCharacters)
+      {
+        for (int i = 0; i < b.length; i++)
+          {
+            if (b[i] != ' ' && b[i] != '\t' && b[i] != '\n' && b[i] != '\r')
+              {
+                white = false;
+                break;
+              }
+          }
+      }
+    return white;
+  }
+
   public void parse(String systemId)
     throws IOException, SAXException
   {
Index: gnu/xml/stream/XMLInputStreamReader.java
===================================================================
RCS file: gnu/xml/stream/XMLInputStreamReader.java
diff -N gnu/xml/stream/XMLInputStreamReader.java
--- gnu/xml/stream/XMLInputStreamReader.java    12 Dec 2005 11:35:38 -0000      
1.1
+++ /dev/null   1 Jan 1970 00:00:00 -0000
@@ -1,211 +0,0 @@
-/* XMLInputStreamReader.java -- 
-   Copyright (C) 2005  Free Software Foundation, Inc.
-
-This file is part of GNU Classpath.
-
-GNU Classpath is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU Classpath is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Classpath; see the file COPYING.  If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301 USA.
-
-Linking this library statically or dynamically with other modules is
-making a combined work based on this library.  Thus, the terms and
-conditions of the GNU General Public License cover the whole
-combination.
-
-As a special exception, the copyright holders of this library give you
-permission to link this library with independent modules to produce an
-executable, regardless of the license terms of these independent
-modules, and to copy and distribute the resulting executable under
-terms of your choice, provided that you also meet, for each linked
-independent module, the terms and conditions of the license of that
-module.  An independent module is a module which is not derived from
-or based on this library.  If you modify this library, you may extend
-this exception to your version of the library, but you are not
-obligated to do so.  If you do not wish to do so, delete this
-exception statement from your version. */
-
-package gnu.xml.stream;
-
-import java.io.FilterReader;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.IOException;
-import java.io.Reader;
-import java.io.UnsupportedEncodingException;
-
-/**
- * A mark-capable reader that can copy its buffer state into another
- * instance with a different encoding.
- *
- * @author <a href='mailto:[EMAIL PROTECTED]'>Chris Burdess</a>
- */
-class XMLInputStreamReader
-  extends FilterReader
-{
-
-  final InputStream is;
-  char[] buf;
-  int pos, count, markpos, marklimit, bufferSize;
-
-  XMLInputStreamReader(InputStream is, String encoding)
-    throws UnsupportedEncodingException
-  {
-    super(new InputStreamReader(is, encoding));
-    this.is = is;
-    final int size = 2048;
-    buf = new char[size];
-    pos = count = bufferSize = size;
-  }
-
-  XMLInputStreamReader(XMLInputStreamReader reader, String encoding)
-    throws UnsupportedEncodingException
-  {
-    this(reader.is, encoding);
-    buf = reader.buf;
-    pos = reader.pos;
-    count = reader.count;
-    markpos = reader.markpos;
-    marklimit = reader.marklimit;
-    bufferSize = reader.bufferSize;
-  }
-
-  public void close()
-    throws IOException
-  {
-    buf = null;
-    pos = count = 0;
-    markpos = -1;
-    super.close();
-  }
-
-  public void mark(int readlimit)
-    throws IOException
-  {
-    marklimit = readlimit;
-    markpos = pos;
-  }
-
-  public boolean markSupported()
-  {
-    return true;
-  }
-
-  public int read()
-    throws IOException
-  {
-    if (pos >= count && !refill())
-      return -1;
-    //System.out.println("read1:"+new String(buf, pos, 1));
-    return (int) buf[pos++];
-  }
-
-  public int read(char[] b)
-    throws IOException
-  {
-    return read(b, 0, b.length);
-  }
-
-  public int read(char[] b, int off, int len)
-    throws IOException
-  {
-    if (off < 0 || len < 0 || b.length - off < len)
-      throw new IndexOutOfBoundsException();
-    
-    if (len == 0)
-      return 0;
-    
-    if (pos >= count && !refill())
-      return -1;
-                
-    int ret = Math.min(count - pos, len);
-    System.arraycopy(buf, pos, b, off, ret);
-    //System.out.println("read2:"+new String(b, off, ret));
-    pos += ret;
-    off += ret;
-    len -= ret;
-    
-    while (len > 0 && refill())
-      {
-        int remain = Math.min(count - pos, len);
-        System.arraycopy(buf, pos, b, off, remain);
-        //System.out.println("read3:"+new String(b, off, remain));
-        pos += remain;
-        off += remain;
-        len -= remain;
-        ret += remain;
-      }
-    
-    return ret;
-  }
-
-  public void reset()
-    throws IOException
-  {
-    if (markpos == -1)
-      throw new IOException(buf == null ? "Stream closed." : "Invalid mark.");
-    pos = markpos;
-  }
-
-  public long skip(long n)
-    throws IOException
-  {
-    if (buf == null)
-      throw new IOException("Stream closed.");
-    final long origN = n;
-    while (n > 0L)
-      {
-        if (pos >= count && !refill())
-          break;
-        int numread = (int) Math.min((long) (count - pos), n);
-        pos += numread;
-        n -= numread;
-      }
-    return origN - n;
-  }
-
-  private boolean refill()
-    throws IOException
-  {
-    if (buf == null)
-      throw new IOException("Stream closed.");
-
-    if (markpos == -1 || count - markpos >= marklimit)
-      {
-        markpos = -1;
-        pos = count = 0;
-      }
-    else
-      {
-        char[] newbuf = buf;
-        if (markpos < bufferSize)
-          { 
-            newbuf = new char[count - markpos + bufferSize];
-          }
-        System.arraycopy(buf, markpos, newbuf, 0, count - markpos);
-        buf = newbuf;
-        count -= markpos;
-        pos -= markpos;
-        markpos = 0;
-      }
-
-    int numread = super.read(buf, count, bufferSize);
-
-    if (numread <= 0)
-      return false;
-
-    count += numread;
-    return true;
-  }
-
-}
Index: gnu/xml/stream/XMLParser.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/xml/stream/XMLParser.java,v
retrieving revision 1.11
diff -u -r1.11 XMLParser.java
--- gnu/xml/stream/XMLParser.java       24 Dec 2005 17:56:32 -0000      1.11
+++ gnu/xml/stream/XMLParser.java       27 Dec 2005 19:42:44 -0000
@@ -415,8 +415,6 @@
 
   public String getEncoding()
   {
-    if (input.forceReader)
-      return null;
     return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
   }
 
@@ -838,7 +836,7 @@
                   {
                     reset();
                     event = readCharData(null);
-                    if (validating)
+                    if (validating && doctype != null)
                       validatePCData(buf.toString());
                   }
               }
@@ -849,12 +847,13 @@
             buf.append(elementName);
             state = stack.isEmpty() ? MISC : CONTENT;
             event = XMLStreamConstants.END_ELEMENT;
-            if (validating)
+            if (validating && doctype != null)
               endElementValidationHook();
             break;
           case INIT: // XMLDecl?
             if (tryRead(TEST_XML_DECL))
               readXMLDecl();
+            input.finalizeEncoding();
             event = XMLStreamConstants.START_DOCUMENT;
             state = PROLOG;
             break;
@@ -922,6 +921,13 @@
       }
   }
 
+  // package private
+
+  String getCurrentElement()
+  {
+    return (String) stack.getLast();
+  }
+
   // private
 
   private void mark(int limit)
@@ -989,21 +995,24 @@
     char[] chars = delim.toCharArray();
     int len = chars.length;
     mark(len);
-    int l2 = read(tmpBuf, 0, len);
-    if (l2 != len)
+    int off = 0;
+    do
       {
-        reset();
-        error("EOF before required string", delim);
+        int l2 = read(tmpBuf, off, len - off);
+        if (l2 == -1)
+          {
+            reset();
+            error("EOF before required string", delim);
+          }
+        off += l2;
       }
-    else
+    while (off < len);
+    for (int i = 0; i < chars.length; i++)
       {
-        for (int i = 0; i < chars.length; i++)
+        if (chars[i] != tmpBuf[i])
           {
-            if (chars[i] != tmpBuf[i])
-              {
-                reset();
-                error("required string", delim);
-              }
+            reset();
+            error("required string", delim);
           }
       }
   }
@@ -1035,12 +1044,25 @@
     char[] chars = test.toCharArray();
     int len = chars.length;
     mark(len);
+    int count = 0;
     int l2 = read(tmpBuf, 0, len);
-    if (l2 < len)
+    if (l2 == -1)
       {
         reset();
         return false;
       }
+    count += l2;
+    while (count < len)
+      {
+        // force read
+        int c = read();
+        if (c == -1)
+          {
+            reset();
+            return false;
+          }
+        tmpBuf[count++] = (char) c;
+      }
     for (int i = 0; i < len; i++)
       {
         if (chars[i] != tmpBuf[i])
@@ -1227,6 +1249,7 @@
         input.init();
         if (tryRead(TEST_XML_DECL))
           readTextDecl();
+        input.finalizeEncoding();
       }
     //System.out.println("pushInput "+name+" "+url);
   }
@@ -1360,10 +1383,9 @@
     require("encoding");
     readEq();
     String enc = readLiteral(flags);
-    if (!input.forceReader)
-      input.setInputEncoding(enc);
     skipWhitespace();
     require("?>");
+    input.setInputEncoding(enc);
   }
 
   /**
@@ -1393,8 +1415,6 @@
           error("whitespace required before 'encoding='");
         readEq();
         xmlEncoding = readLiteral(flags);
-        if (!input.forceReader)
-          input.setInputEncoding(xmlEncoding);
         white = tryWhitespace();
       }
     
@@ -1414,6 +1434,8 @@
 
     skipWhitespace();
     require("?>");
+    if (xmlEncoding != null)
+      input.setInputEncoding(xmlEncoding);
   }
 
   /**
@@ -2147,7 +2169,7 @@
               error("unbound attribute prefix", attr.prefix);
           }
       }
-    if (validating)
+    if (validating && doctype != null)
       {
         validateStartElement(elementName);
         currentContentModel = doctype.getElementModel(elementName);
@@ -2283,7 +2305,7 @@
     // Make element name available
     buf.setLength(0);
     buf.append(expected);
-    if (validating)
+    if (validating && doctype != null)
       endElementValidationHook();
   }
 
@@ -3309,8 +3331,6 @@
   {
     if (currentContentModel == null)
       return; // root element
-    if (doctype == null)
-      error("document does not specify a DTD");
     switch (currentContentModel.type)
       {
       case ContentModel.EMPTY:
@@ -3869,7 +3889,7 @@
     
     InputStream in;
     Reader reader;
-    boolean forceReader, initialized;
+    boolean initialized;
     String inputEncoding;
     boolean xml11;
 
@@ -3891,38 +3911,25 @@
     }
     
     Input(InputStream in, Reader reader, String publicId, String systemId,
-          String name, String defaultEncoding)
+          String name, String inputEncoding)
     {
-      if (defaultEncoding == null)
-        defaultEncoding = "UTF-8";
-      if (in != null && !in.markSupported())
-        in = new BufferedInputStream(in);
-      this.in = in;
+      if (inputEncoding == null)
+        inputEncoding = "UTF-8";
+      this.inputEncoding = inputEncoding;
       this.publicId = publicId;
       this.systemId = systemId;
       this.name = name;
-      if (reader == null)
+      if (in != null)
         {
-          try
-            {
-              in = new CRLFInputStream(in);
-              reader = new XMLInputStreamReader(in, defaultEncoding);
-            }
-          catch (UnsupportedEncodingException e)
-            {
-              RuntimeException e2 =
-                new RuntimeException(defaultEncoding +
-                                     " charset not supported");
-              e2.initCause(e);
-              throw e2;
-            }
+          if (reader != null)
+            throw new IllegalStateException("both byte and char streams "+
+                                            "specified");
+          in = new CRLFInputStream(in);
+          in = new BufferedInputStream(in);
+          this.in = in;
         }
       else
-        {
-          forceReader = true;
-          reader = new CRLFReader(reader);
-        }
-      this.reader = reader;
+        this.reader = new CRLFReader(reader);
       initialized = false;
     }
 
@@ -3953,7 +3960,7 @@
     {
       if (initialized)
         return;
-      if (!forceReader && in != null)
+      if (in != null)
         detectEncoding();
       initialized = true;
     }
@@ -3965,7 +3972,10 @@
       markOffset = offset;
       markLine = line;
       markColumn = column;
-      reader.mark(len);
+      if (reader != null)
+        reader.mark(len);
+      else
+        in.mark(len);
     }
 
     /**
@@ -3975,9 +3985,10 @@
       throws IOException
     {
       offset++;
-      int ret = reader.read();
-      //System.out.println("read1:"+((char) ret));
-      if (ret == 0x0d || (xml11 && ret == 0x85))
+      int ret = (reader != null) ? reader.read() : in.read();
+      //if (ret != -1)
+      //  System.out.println("  read1:"+((char) ret));
+      if (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028)))
         ret = 0x0a;
       if (ret == 0x0a)
         {
@@ -3995,14 +4006,28 @@
     int read(char[] b, int off, int len)
       throws IOException
     {
-      int ret = reader.read(b, off, len);
+      int ret;
+      if (reader != null)
+        ret = reader.read(b, off, len);
+      else
+        {
+          byte[] b2 = new byte[len];
+          ret = in.read(b2, 0, len);
+          if (ret != -1)
+            {
+              String s = new String(b2, 0, ret, inputEncoding);
+              char[] c = s.toCharArray();
+              ret = c.length;
+              System.arraycopy(c, 0, b, off, ret);
+            }
+        }
       if (ret != -1)
         {
-          //System.out.println("read:"+new String(b, off, ret));
+          //System.out.println("  read:"+new String(b, off, ret));
           for (int i = 0; i < ret; i++)
             {
               char c = b[off + i];
-              if (c == 0x0d || (xml11 && c == 0x85))
+              if (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028)))
                 {
                   c = 0x0a;
                   b[off + i] = c;
@@ -4023,7 +4048,10 @@
       throws IOException
     {
       //System.out.println("  reset");
-      reader.reset();
+      if (reader != null)
+        reader.reset();
+      else
+        in.reset();
       offset = markOffset;
       line = markLine;
       column = markColumn;
@@ -4066,33 +4094,46 @@
 
       // 4-byte encodings
       if (equals(SIGNATURE_UCS_4_1234, signature))
-        setInputEncoding("UTF-32BE");
+        {
+          in.read();
+          in.read();
+          in.read();
+          in.read();
+          setInputEncoding("UTF-32BE");
+        }
       else if (equals(SIGNATURE_UCS_4_4321, signature))
-        setInputEncoding("UTF-32LE");
+        {
+          in.read();
+          in.read();
+          in.read();
+          in.read();
+          setInputEncoding("UTF-32LE");
+        }
       else if (equals(SIGNATURE_UCS_4_2143, signature) ||
                equals(SIGNATURE_UCS_4_3412, signature))
         throw new UnsupportedEncodingException("unsupported UCS-4 byte 
ordering");
+      
       // 2-byte encodings
       else if (equals(SIGNATURE_UCS_2_12, signature))
         {
-          setInputEncoding("UTF-16BE");
           in.read();
           in.read();
+          setInputEncoding("UTF-16BE");
         }
       else if (equals(SIGNATURE_UCS_2_21, signature))
         {
-          setInputEncoding("UTF-16LE");
           in.read();
           in.read();
+          setInputEncoding("UTF-16LE");
         }
       else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
         {
-          setInputEncoding("UTF-16BE");
+          //setInputEncoding("UTF-16BE");
           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 
entity");
         }
       else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
         {
-          setInputEncoding("UTF-16LE");
+          //setInputEncoding("UTF-16LE");
           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 
entity");
         }
       // ASCII-derived encodings
@@ -4102,10 +4143,10 @@
         }
       else if (equals(SIGNATURE_UTF_8_BOM, signature))
         {
-          setInputEncoding("UTF-8");
           in.read();
           in.read();
           in.read();
+          setInputEncoding("UTF-8");
         }
     }
 
@@ -4119,37 +4160,27 @@
       return true;
     }
     
-    private void setInputEncoding(String encoding)
-      throws UnsupportedEncodingException
+    void setInputEncoding(String encoding)
+      throws IOException
     {
-      if (!encoding.equals(inputEncoding) &&
-          reader instanceof XMLInputStreamReader)
-        {
-          if (inputEncoding == "UTF-8" &&
-              (encoding.startsWith("UTF-16") ||
-               encoding.startsWith("UTF-32")))
-            throw new UnsupportedEncodingException("document is not in its " +
-                                                   "declared encoding");
-          inputEncoding = encoding;
-          reader = new XMLInputStreamReader((XMLInputStreamReader) reader,
-                                            encoding);
-        }
-      else
-        {
-          /*if (reporter != null)
-            {
-            try
-            {
-            reporter.report("unable to set input encoding '" + encoding +
-            "': input is specified as reader", "WARNING",
-            encoding, this);
-            }
-            catch (XMLStreamException e)
-            {
-          // Am I bothered?
-          }}*/
-          System.err.println("Can't set input encoding "+encoding);
-        }
+      if (encoding.equals(inputEncoding))
+        return;
+      if (reader != null)
+        throw new UnsupportedEncodingException("document is not in its " +
+                                               "declared encoding: " +
+                                               inputEncoding);
+      inputEncoding = encoding;
+      finalizeEncoding();
+    }
+
+    void finalizeEncoding()
+      throws IOException
+    {
+      if (reader != null)
+        return;
+      //reader = new XMLInputStreamReader(in, inputEncoding);
+      reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
+      mark(1);
     }
 
   }

Attachment: pgpAY8CTH0JJ4.pgp
Description: PGP signature

_______________________________________________
Classpath-patches mailing list
Classpath-patches@gnu.org
http://lists.gnu.org/mailman/listinfo/classpath-patches

Reply via email to