/*
 * @(#)XMLStreamReader.java
 *
 * Copyright (c) 2000 Open3.org
 * All Rights Reserved.
 *
 * This software is licensed under the Open3.org Public License (OPL)
 * By using, modifying or distributing this software in source code or binary
 * format, you agree with the terms of the Open3.org Public License (OPL).
 *
 * For more information, see:
 *
 * 	http://www.open3.org/licenses/OPL.html
 *
 * OPEN3.ORG MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
 * SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT
 * LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. OPEN3.ORG SHALL
 * NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING,
 * MODIFYING OR DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
 *
 * CopyrightVersion 1.00
 */
package org.open3.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;

/**
 * This class prevents two common implementation issues with most SAX XML
 * parser. Most parsers will read data into a buffer and then parse the
 * buffer and repeat. Parsers also usually call the close method upon
 * a parse exception or a parse completion. Since XSTP wants each document
 * to be parsed as it comes in and also to leave the stream open (since closing
 * this closes the socket), this class should be used like a buffered reader.
 * <P>
 * To correct the first problem, the read method has been overriden to prevent
 * buffering beyond a '>' character. This ensure that the end of a document
 * will be noticed by most parsers.
 * <P>
 * To correct the second problem, the close method is overriden to ignore
 * stream closure requests using the close method. A new method, myClose
 * has been added to allow closure at the appropriate time.
 * <P>
 * This class also contains a method to remove any data from the current stream
 * location up to a '<' character. This helps with another common issue in
 * parsers. Most require that the XML document start with XML immediately (no
 * spaces or newlines). This method can be called before calling the parser
 * to remove any invlaid characters from the stream.
 *
 * <P><B>Modifications:</B>
 * <UL>
 *      <LI>08.10.2000 - DLS - Original code completed.
 *      <LI>10.29.2000 - DLS - Added comments and made code readability changes.
 *      <LI>04.19.2001 - DLS - Modified removeInvalid to throw IOException
 *                             when the end of the streasm is reached.
 *      <LI>09.06.2001 - DLS - Added close, myClose, read(), read(ch[]), &
 *                             readLine(). Changed logic in read(ch[], int, int)
 *                             to handle arbitrary buffers (rather than Xerces
 *                             only). Changed name of class (was
 *                             XercesXMLStreamReader). Corrected comments.
 * </UL>
 * 
 * @author Duane L. Stoddard
 * @version $Revision: 1.2 $ $Date: 2001/05/23 04:53:56 $
 */
public class XMLStreamReader extends BufferedReader {
    /** A flag used to determine if the entire buffer should be newlines. */
    private boolean _insertChars = false;
    /** A flag used to cause the stream reader to report EOF/EOS. */
    private boolean _reportClose = false;
    
    /**
     * This constructor passes the supplied reader to the parent class. A
     * default buffer size will be used.
     *
     * @param       in                  The supplied reader from which data is
     *                                  read.
     */
    public XMLStreamReader(Reader in) {
        super(in);
    }
    
    /**
     * This constructor passes the supplied reader and size to the parent
     * class.
     *
     * @param       in                  The supplied reader from which data is
     *                                  read.
     * @param       sz                  The size of the data buffer that will be
     *                                  used when reading data from the reader.
     */
    public XMLStreamReader(Reader in, int sz) {
        super(in, sz);
    }
    
    /**
     * Override the parent implementation to ignore all close requests. Since
     * some parsers call close upon an error or when finished processing,
     * this call should be ignored. The stream can't be closed since this closes
     * the underlying socket connection.
     */
    public void close() throws IOException {
        // Don't do anything
        return;
    }
    
    /**
     * This method is used to actually cause the stream to be closed. This
     * will cause the parent close method.
     * 
     * @exception   IOException         if the parent throws this exception.
     */
    public void myClose() throws IOException {
        super.close();
    }
    
    /**
     * Enable/disable the reporting of a stream closure to calls to the read
     * methods. If report is set to true, all calls to read will simply return
     * a -1 (indication of no more data or stream end reached). If it is set
     * to false, the method calls will be performed normally.
     * 
     * @param       report              enable/disable setting
     */
    public void reportClose(boolean report) {
        _reportClose = report;
    }
    
    /**
     * This function returns false since marks cannot be supported because
     * characters may be inserted into the underlying data stream.
     *
     * @return      false - marking the stream is not supported.
     */
    public boolean markSupported() {
        return false;
    }
    
    /**
     * This function does nothing since mark is not supported.
     *
     * @param       length              The length to mark.
     * @exception   IOException         if this method is called.
     */
    public void mark(int length) throws IOException {
        throw new IOException("Operation (mark) not supported");
    }
    
    /**
     * This function does nothing since reset is not supported.
     * 
     * @exception   IOException         if this method is called.
     */
    public void reset() throws IOException {
        throw new IOException("Operation (reset) not supported");
    }
    
    /**
     * Remove all the extra characters at the start of a data stream up to a
     * '<' character. Only the specified number of characters at the start of
     * the stream are checked.
     *
     * @param       limit               The maximum number of characters to check.
     *
     * @exception   IndexOutOfBoundsException if a '>' is not found in the
     *                                  characters. The stream will be unchanged
     *                                  after this exception.
     * @exception   IOException         if the end of the stream is reached (or
     *                                  some other internal error occurs while
     *                                  accessing the stream).
     */
    public void removeInvalid(int limit) throws IOException {
        int i = 0;
        
        super.mark(limit);
        for(i = 0; i < limit; ++i) {
            int c = super.read();
            
            if(c == -1) {
                throw new IOException("End of stream reached");
            }
            if(c == '<') {
                break;
            }
        }
        
        if(i == limit) {
            super.reset();
            throw new IndexOutOfBoundsException("Character limit reached");
        }
        
        super.reset();
        super.skip(i);
    }
    
    /**
     * Override the parent implementation to report a stream closure (-1)
     * when appropriate.
     * 
     * @exception   IOException         if the parent throws this exception.
     */
    public int read() throws IOException {
        if(_reportClose) {
            return -1;
        } else {
            return super.read();
        }
    }
    
    /**
     * Override the parent implementation to report a stream closure (-1)
     * when appropriate. The parent converts this call into a read with
     * an offset and length so no other special processing is required.
     * 
     * @param       ch                  The caller's character buffer.
     * @exception   IOException         if the parent throws this exception.
     */
    public int read(char[] ch) throws IOException {
        if(_reportClose) {
            return -1;
        } else {
            return super.read(ch);
        }
    }
    
    /**
     * Overrise the parent implementation to report a stream closure (-1)
     * when appropriate. No other special processing is done. This method
     * is typically not used with parsers, but as long as each document
     * ends with a newline then this should be OK.
     * 
     * @exception   IOException         if the parent throws this exception.
     */
    public String readLine() throws IOException {
        if(_reportClose) {
            return null;
        } else {
            return super.readLine();
        }
    }
    
    /**
     * Read the specified number of characters into the supplied buffer. If a
     * '>' is the first character read then tell the parser that there are no
     * more characters for now. This prevents most parsers from overreading the
     * data. This also causes most parsers to parse each document as it comse
     * in, which is important since a response is usually required.
     *
     * @param       ch                  The caller's character buffer.
     * @param       off                 The offset in that buffer (start
     *                                  location).
     * @param       len                 The maximum length (number of chars).
     * @return      The number of characters read into the buffer
     *              or -1 if no more characters can be read from the
     *              buffer.
     *
     * @exception   IOException         if an IO error occurs.
     */
    public int read(char[] ch, int off, int len) throws IOException {
        int rc = 0;
        
        // Check the specified bounds
        if ((off < 0) || (off > ch.length) || (len < 0) ||
            ((off + len) > ch.length) || ((off + len) < 0)) {
            throw new IndexOutOfBoundsException();
        } else if (len == 0) {
            return 0;
        }
        
        // If we should report a closure, then do so
        if(_reportClose) {
            return -1;
        }
        
        // Read characters until the specified length is reached, a '>' is
        // found, or a block call would be required to read more.
        int count = 0;
        int c = -1;
        while((c = super.read()) != -1) {
            ch[off + count] = (char)c;
            ++count;
            if(count == len) {
                break;
            }
            if(c == '>') {
                break;
            }
            if(!super.ready()) {
                break;
            }
        }
        
        if(count == 0) {
            return -1;
        } else {
            return count;
        }
    }
}

