mpoeschl 2003/01/18 05:40:03 Modified: src/java/uk/co/wilson/xml MinML.java Log: Update to MinML 1.7 Revision Changes Path 1.3 +768 -762 xml-rpc/src/java/uk/co/wilson/xml/MinML.java Index: MinML.java =================================================================== RCS file: /home/cvs/xml-rpc/src/java/uk/co/wilson/xml/MinML.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- MinML.java 14 Nov 2001 15:10:56 -0000 1.2 +++ MinML.java 18 Jan 2003 13:40:03 -0000 1.3 @@ -1,762 +1,768 @@ -// Copyright (c) 2000, 2001 The Wilson Partnership. -// All Rights Reserved. -// @(#)MinML.java, 1.6, 11th November 2001 -// Author: John Wilson - [EMAIL PROTECTED] - -package uk.co.wilson.xml; - -/* -Copyright (c) 2000, 2001 John Wilson ([EMAIL PROTECTED]). -All rights reserved. -Redistribution and use in source and binary forms, -with or without modification, are permitted provided -that the following conditions are met: - -Redistributions of source code must retain the above -copyright notice, this list of conditions and the -following disclaimer. - -Redistributions in binary form must reproduce the -above copyright notice, this list of conditions and -the following disclaimer in the documentation and/or -other materials provided with the distribution. - -All advertising materials mentioning features or use -of this software must display the following acknowledgement: - -This product includes software developed by John Wilson. -The name of John Wilson may not be used to endorse or promote -products derived from this software without specific prior -written permission. - -THIS SOFTWARE IS PROVIDED BY JOHN WILSON ``AS IS'' AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JOHN WILSON -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE -*/ - -import java.io.InputStream; -import java.io.Reader; -import java.io.Writer; -import java.io.InputStreamReader; -import java.io.IOException; - -import java.net.URL; - -import java.util.Locale; -import java.util.Vector; -import java.util.Stack; -import java.util.EmptyStackException; - -import uk.org.xml.sax.Parser; -import uk.org.xml.sax.DocumentHandler; - -import org.xml.sax.EntityResolver; -import org.xml.sax.DTDHandler; -import org.xml.sax.ErrorHandler; -import org.xml.sax.Locator; -import org.xml.sax.InputSource; -import org.xml.sax.AttributeList; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - -public class MinML implements Parser, Locator, DocumentHandler, ErrorHandler { - public static final int endStartName = 0; - public static final int emitStartElement = 1; - public static final int emitEndElement = 2; - public static final int emitCharacters = 3; - public static final int emitCharactersSave = 4; - public static final int saveAttributeName = 5; - public static final int saveAttributeValue = 6; - public static final int startComment = 7; - public static final int endComment = 8; - public static final int incLevel = 9; - public static final int decLevel = 10; - public static final int startCDATA = 11; - public static final int endCDATA = 12; - public static final int processCharRef = 13; - public static final int writeCdata = 14; - public static final int exitParser = 15; - public static final int parseError = 16; - public static final int discardAndChange = 17; - public static final int discardSaveAndChange = 18; - public static final int saveAndChange = 19; - public static final int change = 20; - - public static final int inSkipping = 0; - public static final int inSTag = 1; - public static final int inPossiblyAttribute = 2; - public static final int inNextAttribute = 3; - public static final int inAttribute = 4; - public static final int inAttribute1 = 5; - public static final int inAttributeValue = 6; - public static final int inAttributeQuoteValue = 7; - public static final int inAttributeQuotesValue = 8; - public static final int inETag = 9; - public static final int inETag1 = 10; - public static final int inMTTag = 11; - public static final int inTag = 12; - public static final int inTag1 = 13; - public static final int inPI = 14; - public static final int inPI1 = 15; - public static final int inPossiblySkipping = 16; - public static final int inCharData = 17; - public static final int inCDATA = 18; - public static final int inCDATA1 = 19; - public static final int inComment =20; - public static final int inDTD = 21; - - public MinML(final int initialBufferSize, final int bufferIncrement) { - this.initialBufferSize = initialBufferSize; - this.bufferIncrement = bufferIncrement; - } - - public MinML() { - this(256, 128); - } - - public void parse(final Reader in) throws SAXException, IOException { - final Vector attributeNames = new Vector(); - final Vector attributeValues = new Vector(); - - final AttributeList attrs = new AttributeList() { - public int getLength() { - return attributeNames.size(); - } - - public String getName(final int i) { - return (String)attributeNames.elementAt(i); - } - - public String getType(final int i) { - return "CDATA"; - } - - public String getValue(final int i) { - return (String)attributeValues.elementAt(i); - } - - public String getType(final String name) { - return "CDATA"; - } - - public String getValue(final String name) { - final int index = attributeNames.indexOf(name); - - return (index == -1) ? null : (String)attributeValues.elementAt(index); - } - }; - - final MinMLBuffer buffer = new MinMLBuffer(in); - int currentChar = 0, charCount = 0; - int level = 0; - int mixedContentLevel = -1; - String elementName = null; - String state = operands[inSkipping]; - - this.lineNumber = 1; - this.columnNumber = 0; - - try { - while(true) { - charCount++; - - // - // this is to try and make the loop a bit faster - // currentChar = buffer.read(); is simpler but is a bit slower. - // - currentChar = (buffer.nextIn == buffer.lastIn) ? buffer.read() : buffer.chars[buffer.nextIn++]; - - final int transition; - - if (currentChar > ']') { - transition = state.charAt(14); - } else { - final int charClass = charClasses[currentChar + 1]; - - if (charClass == -1) fatalError("Document contains illegal control character with value " + currentChar, this.lineNumber, this.columnNumber); - - if (charClass == 12) { - if (currentChar == '\r') { - currentChar = '\n'; - charCount = -1; - } - - if (currentChar == '\n') { - if (charCount == 0) continue; // preceeded by '\r' so ignore - - if (charCount != -1) charCount = 0; - - this.lineNumber++; - this.columnNumber = 0; - } - } - - transition = state.charAt(charClass); - } - - this.columnNumber++; - - String operand = operands[transition >>> 8]; - - switch (transition & 0XFF) { - case endStartName: - // end of start element name - elementName = buffer.getString(); - if (currentChar != '>' && currentChar != '/') break; // change state to operand - // drop through to emit start element (we have no attributes) - - case emitStartElement: - // emit start element - - final Writer newWriter = this.extDocumentHandler.startElement(elementName, attrs, - (this.tags.empty()) ? - this.extDocumentHandler.startDocument(buffer) - : - buffer.getWriter()); - - buffer.pushWriter(newWriter); - this.tags.push(elementName); - - attributeValues.removeAllElements(); - attributeNames.removeAllElements(); - - if (mixedContentLevel != -1) mixedContentLevel++; - - if (currentChar != '/') break; // change state to operand - - // <element/> drop through - - case emitEndElement: - // emit end element - - try { - final String begin = (String)this.tags.pop(); - - buffer.popWriter(); - elementName = buffer.getString(); - - if (currentChar != '/' && !elementName.equals(begin)) { - fatalError("end tag </" + elementName + "> does not match begin tag <" + begin + ">", - this.lineNumber, this.columnNumber); - } else { - this.documentHandler.endElement(begin); - - if (this.tags.empty()) { - this.documentHandler.endDocument(); - return; - } - } - } - catch (final EmptyStackException e) { - fatalError("end tag at begining of document", this.lineNumber, this.columnNumber); - } - - if (mixedContentLevel != -1 && --mixedContentLevel == 0) - operand = operands[inCharData]; - - break; // change state to operand - - case emitCharacters: - // emit characters - - buffer.flush(); - break; // change state to operand - - case emitCharactersSave: - // emit characters and save current character - - if (mixedContentLevel == -1) mixedContentLevel = 0; - - buffer.flush(); - - buffer.saveChar((char)currentChar); - - break; // change state to operand - - case saveAttributeName: - // save attribute name - - attributeNames.addElement(buffer.getString()); - break; // change state to operand - - case saveAttributeValue: - // save attribute value - - attributeValues.addElement(buffer.getString()); - break; // change state to operand - - case startComment: - // change state if we have found "<!--" - - if (buffer.read() != '-') continue; // not "<!--" - - break; // change state to operand - - case endComment: - // change state if we find "-->" - - if ((currentChar = buffer.read()) == '-') { - // deal with the case where we might have "------->" - while ((currentChar = buffer.read()) == '-'); - - if (currentChar == '>') break; // end of comment, change state to operand - } - - continue; // not end of comment, don't change state - - case incLevel: - - level++; - - break; - - case decLevel: - - if (level == 0) break; // outer level <> change state - - level--; - - continue; // in nested <>, don't change state - - case startCDATA: - // change state if we have found "<![CDATA[" - - if (buffer.read() != 'C') continue; // don't change state - if (buffer.read() != 'D') continue; // don't change state - if (buffer.read() != 'A') continue; // don't change state - if (buffer.read() != 'T') continue; // don't change state - if (buffer.read() != 'A') continue; // don't change state - if (buffer.read() != '[') continue; // don't change state - break; // change state to operand - - case endCDATA: - // change state if we find "]]>" - - if ((currentChar = buffer.read()) == ']') { - // deal with the case where we might have "]]]]]]]>" - while ((currentChar = buffer.read()) == ']') buffer.write(']'); - - if (currentChar == '>') break; // end of CDATA section, change state to operand - - buffer.write(']'); - } - - buffer.write(']'); - buffer.write(currentChar); - continue; // not end of CDATA section, don't change state - - case processCharRef: - // process character entity - - int crefState = 0; - - currentChar = buffer.read(); - - while (true) { - if ("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState) == currentChar) { - crefState++; - - if (currentChar == ';') { - buffer.write("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState)); - break; - - } else if (currentChar == '#') { - final int radix; - - currentChar = buffer.read(); - - if (currentChar == 'x') { - radix = 16; - currentChar = buffer.read(); - } else { - radix = 10; - } - - int charRef = Character.digit((char)currentChar, radix); - - while (true) { - currentChar = buffer.read(); - - final int digit = Character.digit((char)currentChar, radix); - - if (digit == -1) break; - - charRef = (char)((charRef * radix) + digit); - } - - if (currentChar == ';' && charRef != -1) { - buffer.write(charRef); - break; - } - - fatalError("invalid Character Entitiy", this.lineNumber, this.columnNumber); - } else { - currentChar = buffer.read(); - } - } else { - crefState = ("\u0001\u000b\u0006\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff" + -// # a m p ; & p o s ; ' -// 0 1 2 3 4 5 6 7 8 9 a - "\u0011\u00ff\u00ff\u00ff\u00ff\u00ff\u0015\u00ff\u00ff\u00ff" + -// q u o t ; " g t ; > -// b b d e f 10 11 12 13 14 - "\u00ff\u00ff\u00ff").charAt(crefState); -// l t ; -// 15 16 17 - - if (crefState == 255) fatalError("invalid Character Entitiy", this.lineNumber, this.columnNumber); - } - } - - break; - - case parseError: - // report fatal error - - fatalError(operand, this.lineNumber, this.columnNumber); - // drop through to exit parser - - case exitParser: - // exit parser - - return; - - case writeCdata: - // write character data - // this will also write any skipped whitespace - - buffer.write(currentChar); - break; // change state to operand - - case discardAndChange: - // throw saved characters away and change state - - buffer.reset(); - break; // change state to operand - - case discardSaveAndChange: - // throw saved characters away, save character and change state - - buffer.reset(); - // drop through to save character and change state - - case saveAndChange: - // save character and change state - - buffer.saveChar((char)currentChar); - break; // change state to operand - - case change: - // change state to operand - - break; // change state to operand - } - - state = operand; - } - } - catch (final IOException e) { - this.errorHandler.fatalError(new SAXParseException(e.toString(), null, null, this.lineNumber, this.columnNumber, e)); - } - finally { - this.errorHandler = this; - this.documentHandler = this.extDocumentHandler = this; - this.tags.removeAllElements(); - } - } - - public void parse(final InputSource source) throws SAXException, IOException { - if (source.getCharacterStream() != null) - parse(source.getCharacterStream()); - else if (source.getByteStream() != null) - parse(new InputStreamReader(source.getByteStream())); - else - parse(new InputStreamReader(new URL(source.getSystemId()).openStream())); - } - - public void parse(final String systemId) throws SAXException, IOException { - parse(new InputSource(systemId)); - } - - public void setLocale(final Locale locale) throws SAXException { - throw new SAXException("Not supported"); - } - - public void setEntityResolver(final EntityResolver resolver) { - // not supported - } - - public void setDTDHandler(final DTDHandler handler) { - // not supported - } - - public void setDocumentHandler(final org.xml.sax.DocumentHandler handler) { - this.documentHandler = (handler == null) ? this : handler; - this.extDocumentHandler = this; - } - - public void setDocumentHandler(final DocumentHandler handler) { - this.documentHandler = this.extDocumentHandler = (handler == null) ? this : handler; - this.documentHandler.setDocumentLocator(this); - } - - public void setErrorHandler(final ErrorHandler handler) { - this.errorHandler = (handler == null) ? this : handler; - } - - public void setDocumentLocator(final Locator locator) { - } - - public void startDocument() throws SAXException { - } - - public Writer startDocument(final Writer writer) throws SAXException { - this.documentHandler.startDocument(); - return writer; - } - - public void endDocument() throws SAXException { - } - - public void startElement(final String name, final AttributeList attributes) throws SAXException { - } - - public Writer startElement(final String name, final AttributeList attributes, final Writer writer) - throws SAXException - { - this.documentHandler.startElement(name, attributes); - return writer; - } - - public void endElement(final String name) throws SAXException { - } - - public void characters(final char ch[], final int start, final int length) throws SAXException { - } - - public void ignorableWhitespace(final char ch[], final int start, final int length) throws SAXException { - } - - public void processingInstruction(final String target, final String data) throws SAXException { - } - - public void warning(final SAXParseException e) throws SAXException { - } - - public void error(final SAXParseException e) throws SAXException { - } - - public void fatalError(final SAXParseException e) throws SAXException { - throw e; - } - - public String getPublicId() { - return ""; - } - - - public String getSystemId() { - return ""; - } - - public int getLineNumber () { - return this.lineNumber; - } - - public int getColumnNumber () { - return this.columnNumber; - } - - private void fatalError(final String msg, final int lineNumber, final int columnNumber) throws SAXException { - this.errorHandler.fatalError(new SAXParseException(msg, null, null, lineNumber, columnNumber)); - } - - private class MinMLBuffer extends Writer { - public MinMLBuffer(final Reader in) { - this.in = in; - } - - public void close() throws IOException { - flush(); - } - - public void flush() throws IOException { - try { - _flush(); - if (writer != this) writer.flush(); - } - finally { - flushed = true; - } - } - - public void write(final int c) throws IOException { - written = true; - chars[count++] = (char)c; - } - - public void write(final char[] cbuf, final int off, final int len) throws IOException { - written = true; - System.arraycopy(cbuf, off, chars, count, len); - count += len; - } - - public void saveChar(final char c) { - written = false; - chars[count++] = c; - } - - public void pushWriter(final Writer writer) { - MinML.this.tags.push(this.writer); - - this.writer = (writer == null) ? this : writer; - - flushed = written = false; - } - - public Writer getWriter() { - return writer; - } - - public void popWriter() throws IOException { - try { - if (!flushed && writer != this) writer.flush(); - } - finally { - writer = (Writer)MinML.this.tags.pop(); - flushed = written = false; - } - } - - public String getString() { - final String result = new String(chars, 0, count); - - count = 0; - return result; - } - - public void reset() { - count = 0; - } - - public int read() throws IOException { - if (nextIn == lastIn) { - if (count != 0) { - if (written) { - _flush(); - } else if (count >= (chars.length - MinML.this.bufferIncrement)) { - final char[] newChars = new char[chars.length + MinML.this.bufferIncrement]; - - System.arraycopy(chars, 0, newChars, 0, count); - chars = newChars; - } - } - - final int numRead = in.read(chars, count, chars.length - count); - - if (numRead == -1) return -1; - - nextIn = count; - lastIn = count + numRead; - } - - return chars[nextIn++]; - } - - private void _flush() throws IOException { - if (count != 0) { - try { - if (writer == this) { - try { - MinML.this.documentHandler.characters(chars, 0, count); - } - catch (final SAXException e) { - throw new IOException(e.toString()); - } - } else { - writer.write(chars, 0, count); - } - } - finally { - count = 0; - } - } - } - - private int nextIn = 0, lastIn = 0; - private char[] chars = new char[MinML.this.initialBufferSize]; - private final Reader in; - private int count = 0; - private Writer writer = this; - private boolean flushed = false; - private boolean written = false; - } - - private DocumentHandler extDocumentHandler = this; - private org.xml.sax.DocumentHandler documentHandler = this; - private ErrorHandler errorHandler = this; - private final Stack tags = new Stack(); - private int lineNumber = 1; - private int columnNumber = 0; - private final int initialBufferSize; - private final int bufferIncrement; - - private static final byte[] charClasses = { - // EOF - 13, - // \t \n \r - -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 12, -1, -1, 12, -1, -1, - // - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - // SP ! " # $ % & ' ( ) * + , - . / - 12, 8, 7, 14, 14, 14, 3, 6, 14, 14, 14, 14, 14, 11, 14, 2, - // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 5, 1, 4, - // - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - // [ \ ] - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 9, 14, 10 - }; - - private static final String[] operands = { - "\u0d14\u1610\u1610\u1610\u1610\u1610\u1610\u1610\u1610\u1610\u1610\u1610\u0014\u000f\u1610", - "\u1710\u1000\u0b00\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u0113\u0200\u1810\u0113", - "\u1710\u1001\u0b01\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u0214\u1810\u0413", - "\u1710\u1001\u0b01\u1710\u1910\u1910\u1910\u1910\u1910\u1910\u1910\u1910\u0314\u1810\u0413", - "\u1910\u1910\u1910\u1910\u1910\u0605\u1910\u1910\u1910\u1910\u1910\u0413\u0514\u1810\u0413", - "\u1910\u1910\u1910\u1910\u1910\u0605\u1910\u1910\u1910\u1910\u1910\u1910\u0514\u1810\u1910", - "\u1a10\u1a10\u1a10\u1a10\u1a10\u1a10\u0714\u0814\u1a10\u1a10\u1a10\u1a10\u0614\u1810\u1a10", - "\u0713\u0713\u0713\u070d\u0713\u0713\u0306\u0713\u0713\u0713\u0713\u0713\u0713\u1810\u0713", - "\u0813\u0813\u0813\u080d\u0813\u0813\u0813\u0306\u0813\u0813\u0813\u0813\u0813\u1810\u0813", - "\u1710\u1002\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u0913\u0914\u1810\u0913", - "\u1b10\u1b10\u0903\u1b10\u1b10\u1b10\u1b10\u1b10\u1214\u1b10\u1b10\u1b10\u1b10\u1810\u0104", - "\u1710\u1014\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1710\u1810\u1710", - "\u1710\u1c10\u0911\u1710\u0e11\u1710\u1710\u1710\u1211\u1710\u1710\u1710\u1710\u1810\u0112", - "\u1710\u1c10\u0911\u1710\u0e11\u1710\u1710\u1710\u1211\u1710\u1710\u1710\u1710\u1810\u0112", - "\u0e14\u0e14\u0e14\u0e14\u0f14\u0e14\u0e14\u0e14\u0e14\u0e14\u0e14\u0e14\u0e14\u1810\u0e14", - "\u0e14\u0014\u0e14\u0e14\u0f14\u0e14\u0e14\u0e14\u0e14\u0e14\u0e14\u0e14\u0e14\u1810\u0e14", - "\u0c14\u110e\u110e\u110d\u110e\u110e\u110e\u110e\u110e\u110e\u110e\u110e\u1013\u1810\u110e", - "\u0a14\u110e\u110e\u110d\u110e\u110e\u110e\u110e\u110e\u110e\u110e\u110e\u110e\u1810\u110e", - "\u1d10\u1d10\u1d10\u1d10\u1d10\u1d10\u1d10\u1d10\u1d10\u130b\u1d10\u1407\u1d10\u1810\u1514", - "\u130e\u130e\u130e\u130e\u130e\u130e\u130e\u130e\u130e\u130e\u110c\u130e\u130e\u1810\u130e", - "\u1414\u1414\u1414\u1414\u1414\u1414\u1414\u1414\u1414\u1414\u1414\u0008\u1414\u1810\u1414", - "\u1509\n\u1514\u1514\u1514\u1514\u1514\u1514\u1514\u1514\u1514\u1514\u1514\u1810\u1514", - "expected Element", - "unexpected character in tag", - "unexpected end of file found", - "attribute name not followed by '='", - "invalid attribute value", - "expecting end tag", - "empty tag", - "unexpected character after <!" - }; -} +// Copyright (c) 2000, 2001 The Wilson Partnership. +// All Rights Reserved. +// @(#)MinML.java, 1.7, 18th November 2001 +// Author: John Wilson - [EMAIL PROTECTED] + +package uk.co.wilson.xml; + +/* +Copyright (c) 2000, 2001 John Wilson ([EMAIL PROTECTED]). +All rights reserved. +Redistribution and use in source and binary forms, +with or without modification, are permitted provided +that the following conditions are met: + +Redistributions of source code must retain the above +copyright notice, this list of conditions and the +following disclaimer. + +Redistributions in binary form must reproduce the +above copyright notice, this list of conditions and +the following disclaimer in the documentation and/or +other materials provided with the distribution. + +All advertising materials mentioning features or use +of this software must display the following acknowledgement: + +This product includes software developed by John Wilson. +The name of John Wilson may not be used to endorse or promote +products derived from this software without specific prior +written permission. + +THIS SOFTWARE IS PROVIDED BY JOHN WILSON ``AS IS'' AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL JOHN WILSON +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE +*/ + +import java.io.InputStream; +import java.io.Reader; +import java.io.Writer; +import java.io.InputStreamReader; +import java.io.IOException; + +import java.net.URL; + +import java.util.Locale; +import java.util.Vector; +import java.util.Stack; +import java.util.EmptyStackException; + +import uk.org.xml.sax.Parser; +import uk.org.xml.sax.DocumentHandler; + +import org.xml.sax.EntityResolver; +import org.xml.sax.DTDHandler; +import org.xml.sax.ErrorHandler; +import org.xml.sax.Locator; +import org.xml.sax.InputSource; +import org.xml.sax.AttributeList; +import org.xml.sax.SAXException; +import org.xml.sax.SAXParseException; + +public class MinML implements Parser, Locator, DocumentHandler, ErrorHandler { + public static final int endStartName = 0; + public static final int emitStartElement = 1; + public static final int emitEndElement = 2; + public static final int possiblyEmitCharacters = 3; + public static final int emitCharacters = 4; + public static final int emitCharactersSave = 5; + public static final int saveAttributeName = 6; + public static final int saveAttributeValue = 7; + public static final int startComment = 8; + public static final int endComment = 9; + public static final int incLevel = 10; + public static final int decLevel = 11; + public static final int startCDATA = 12; + public static final int endCDATA = 13; + public static final int processCharRef = 14; + public static final int writeCdata = 15; + public static final int exitParser = 16; + public static final int parseError = 17; + public static final int discardAndChange = 18; + public static final int discardSaveAndChange = 19; + public static final int saveAndChange = 20; + public static final int change = 21; + + public static final int inSkipping = 0; + public static final int inSTag = 1; + public static final int inPossiblyAttribute = 2; + public static final int inNextAttribute = 3; + public static final int inAttribute = 4; + public static final int inAttribute1 = 5; + public static final int inAttributeValue = 6; + public static final int inAttributeQuoteValue = 7; + public static final int inAttributeQuotesValue = 8; + public static final int inETag = 9; + public static final int inETag1 = 10; + public static final int inMTTag = 11; + public static final int inTag = 12; + public static final int inTag1 = 13; + public static final int inPI = 14; + public static final int inPI1 = 15; + public static final int inPossiblySkipping = 16; + public static final int inCharData = 17; + public static final int inCDATA = 18; + public static final int inCDATA1 = 19; + public static final int inComment =20; + public static final int inDTD = 21; + + public MinML(final int initialBufferSize, final int bufferIncrement) { + this.initialBufferSize = initialBufferSize; + this.bufferIncrement = bufferIncrement; + } + + public MinML() { + this(256, 128); + } + + public void parse(final Reader in) throws SAXException, IOException { + final Vector attributeNames = new Vector(); + final Vector attributeValues = new Vector(); + + final AttributeList attrs = new AttributeList() { + public int getLength() { + return attributeNames.size(); + } + + public String getName(final int i) { + return (String)attributeNames.elementAt(i); + } + + public String getType(final int i) { + return "CDATA"; + } + + public String getValue(final int i) { + return (String)attributeValues.elementAt(i); + } + + public String getType(final String name) { + return "CDATA"; + } + + public String getValue(final String name) { + final int index = attributeNames.indexOf(name); + + return (index == -1) ? null : (String)attributeValues.elementAt(index); + } + }; + + final MinMLBuffer buffer = new MinMLBuffer(in); + int currentChar = 0, charCount = 0; + int level = 0; + int mixedContentLevel = -1; + String elementName = null; + String state = operands[inSkipping]; + + this.lineNumber = 1; + this.columnNumber = 0; + + try { + while(true) { + charCount++; + + // + // this is to try and make the loop a bit faster + // currentChar = buffer.read(); is simpler but is a bit slower. + // + currentChar = (buffer.nextIn == buffer.lastIn) ? buffer.read() : buffer.chars[buffer.nextIn++]; + + final int transition; + + if (currentChar > ']') { + transition = state.charAt(14); + } else { + final int charClass = charClasses[currentChar + 1]; + + if (charClass == -1) fatalError("Document contains illegal control character with value " + currentChar, this.lineNumber, this.columnNumber); + + if (charClass == 12) { + if (currentChar == '\r') { + currentChar = '\n'; + charCount = -1; + } + + if (currentChar == '\n') { + if (charCount == 0) continue; // preceeded by '\r' so ignore + + if (charCount != -1) charCount = 0; + + this.lineNumber++; + this.columnNumber = 0; + } + } + + transition = state.charAt(charClass); + } + + this.columnNumber++; + + final String operand = operands[transition >>> 8]; + + switch (transition & 0XFF) { + case endStartName: + // end of start element name + elementName = buffer.getString(); + if (currentChar != '>' && currentChar != '/') break; // change state to operand + // drop through to emit start element (we have no attributes) + + case emitStartElement: + // emit start element + + final Writer newWriter = this.extDocumentHandler.startElement(elementName, attrs, + (this.tags.empty()) ? + this.extDocumentHandler.startDocument(buffer) + : + buffer.getWriter()); + + buffer.pushWriter(newWriter); + this.tags.push(elementName); + + attributeValues.removeAllElements(); + attributeNames.removeAllElements(); + + if (mixedContentLevel != -1) mixedContentLevel++; + + if (currentChar != '/') break; // change state to operand + + // <element/> drop through + + case emitEndElement: + // emit end element + + try { + final String begin = (String)this.tags.pop(); + + buffer.popWriter(); + elementName = buffer.getString(); + + if (currentChar != '/' && !elementName.equals(begin)) { + fatalError("end tag </" + elementName + "> does not match begin tag <" + begin + ">", + this.lineNumber, this.columnNumber); + } else { + this.documentHandler.endElement(begin); + + if (this.tags.empty()) { + this.documentHandler.endDocument(); + return; + } + } + } + catch (final EmptyStackException e) { + fatalError("end tag at begining of document", this.lineNumber, this.columnNumber); + } + + if (mixedContentLevel != -1) --mixedContentLevel; + + break; // change state to operand + + case emitCharacters: + // emit characters + + buffer.flush(); + break; // change state to operand + + case emitCharactersSave: + // emit characters and save current character + + if (mixedContentLevel == -1) mixedContentLevel = 0; + + buffer.flush(); + + buffer.saveChar((char)currentChar); + + break; // change state to operand + + case possiblyEmitCharacters: + // write any skipped whitespace if in mixed content + + if (mixedContentLevel != -1) buffer.flush(); + break; // change state to operand + + case saveAttributeName: + // save attribute name + + attributeNames.addElement(buffer.getString()); + break; // change state to operand + + case saveAttributeValue: + // save attribute value + + attributeValues.addElement(buffer.getString()); + break; // change state to operand + + case startComment: + // change state if we have found "<!--" + + if (buffer.read() != '-') continue; // not "<!--" + + break; // change state to operand + + case endComment: + // change state if we find "-->" + + if ((currentChar = buffer.read()) == '-') { + // deal with the case where we might have "------->" + while ((currentChar = buffer.read()) == '-'); + + if (currentChar == '>') break; // end of comment, change state to operand + } + + continue; // not end of comment, don't change state + + case incLevel: + + level++; + + break; + + case decLevel: + + if (level == 0) break; // outer level <> change state + + level--; + + continue; // in nested <>, don't change state + + case startCDATA: + // change state if we have found "<![CDATA[" + + if (buffer.read() != 'C') continue; // don't change state + if (buffer.read() != 'D') continue; // don't change state + if (buffer.read() != 'A') continue; // don't change state + if (buffer.read() != 'T') continue; // don't change state + if (buffer.read() != 'A') continue; // don't change state + if (buffer.read() != '[') continue; // don't change state + break; // change state to operand + + case endCDATA: + // change state if we find "]]>" + + if ((currentChar = buffer.read()) == ']') { + // deal with the case where we might have "]]]]]]]>" + while ((currentChar = buffer.read()) == ']') buffer.write(']'); + + if (currentChar == '>') break; // end of CDATA section, change state to operand + + buffer.write(']'); + } + + buffer.write(']'); + buffer.write(currentChar); + continue; // not end of CDATA section, don't change state + + case processCharRef: + // process character entity + + int crefState = 0; + + currentChar = buffer.read(); + + while (true) { + if ("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState) == currentChar) { + crefState++; + + if (currentChar == ';') { + buffer.write("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState)); + break; + + } else if (currentChar == '#') { + final int radix; + + currentChar = buffer.read(); + + if (currentChar == 'x') { + radix = 16; + currentChar = buffer.read(); + } else { + radix = 10; + } + + int charRef = Character.digit((char)currentChar, radix); + + while (true) { + currentChar = buffer.read(); + + final int digit = Character.digit((char)currentChar, radix); + + if (digit == -1) break; + + charRef = (char)((charRef * radix) + digit); + } + + if (currentChar == ';' && charRef != -1) { + buffer.write(charRef); + break; + } + + fatalError("invalid Character Entitiy", this.lineNumber, this.columnNumber); + } else { + currentChar = buffer.read(); + } + } else { + crefState = ("\u0001\u000b\u0006\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff" + +// # a m p ; & p o s ; ' +// 0 1 2 3 4 5 6 7 8 9 a + "\u0011\u00ff\u00ff\u00ff\u00ff\u00ff\u0015\u00ff\u00ff\u00ff" + +// q u o t ; " g t ; > +// b b d e f 10 11 12 13 14 + "\u00ff\u00ff\u00ff").charAt(crefState); +// l t ; +// 15 16 17 + + if (crefState == 255) fatalError("invalid Character Entitiy", this.lineNumber, this.columnNumber); + } + } + + break; + + case parseError: + // report fatal error + + fatalError(operand, this.lineNumber, this.columnNumber); + // drop through to exit parser + + case exitParser: + // exit parser + + return; + + case writeCdata: + // write character data + // this will also write any skipped whitespace + + buffer.write(currentChar); + break; // change state to operand + + case discardAndChange: + // throw saved characters away and change state + + buffer.reset(); + break; // change state to operand + + case discardSaveAndChange: + // throw saved characters away, save character and change state + + buffer.reset(); + // drop through to save character and change state + + case saveAndChange: + // save character and change state + + buffer.saveChar((char)currentChar); + break; // change state to operand + + case change: + // change state to operand + + break; // change state to operand + } + + state = operand; + } + } + catch (final IOException e) { + this.errorHandler.fatalError(new SAXParseException(e.toString(), null, null, this.lineNumber, this.columnNumber, e)); + } + finally { + this.errorHandler = this; + this.documentHandler = this.extDocumentHandler = this; + this.tags.removeAllElements(); + } + } + + public void parse(final InputSource source) throws SAXException, IOException { + if (source.getCharacterStream() != null) + parse(source.getCharacterStream()); + else if (source.getByteStream() != null) + parse(new InputStreamReader(source.getByteStream())); + else + parse(new InputStreamReader(new URL(source.getSystemId()).openStream())); + } + + public void parse(final String systemId) throws SAXException, IOException { + parse(new InputSource(systemId)); + } + + public void setLocale(final Locale locale) throws SAXException { + throw new SAXException("Not supported"); + } + + public void setEntityResolver(final EntityResolver resolver) { + // not supported + } + + public void setDTDHandler(final DTDHandler handler) { + // not supported + } + + public void setDocumentHandler(final org.xml.sax.DocumentHandler handler) { + this.documentHandler = (handler == null) ? this : handler; + this.extDocumentHandler = this; + } + + public void setDocumentHandler(final DocumentHandler handler) { + this.documentHandler = this.extDocumentHandler = (handler == null) ? this : handler; + this.documentHandler.setDocumentLocator(this); + } + + public void setErrorHandler(final ErrorHandler handler) { + this.errorHandler = (handler == null) ? this : handler; + } + + public void setDocumentLocator(final Locator locator) { + } + + public void startDocument() throws SAXException { + } + + public Writer startDocument(final Writer writer) throws SAXException { + this.documentHandler.startDocument(); + return writer; + } + + public void endDocument() throws SAXException { + } + + public void startElement(final String name, final AttributeList attributes) throws SAXException { + } + + public Writer startElement(final String name, final AttributeList attributes, final Writer writer) + throws SAXException + { + this.documentHandler.startElement(name, attributes); + return writer; + } + + public void endElement(final String name) throws SAXException { + } + + public void characters(final char ch[], final int start, final int length) throws SAXException { + } + + public void ignorableWhitespace(final char ch[], final int start, final int length) throws SAXException { + } + + public void processingInstruction(final String target, final String data) throws SAXException { + } + + public void warning(final SAXParseException e) throws SAXException { + } + + public void error(final SAXParseException e) throws SAXException { + } + + public void fatalError(final SAXParseException e) throws SAXException { + throw e; + } + + public String getPublicId() { + return ""; + } + + + public String getSystemId() { + return ""; + } + + public int getLineNumber () { + return this.lineNumber; + } + + public int getColumnNumber () { + return this.columnNumber; + } + + private void fatalError(final String msg, final int lineNumber, final int columnNumber) throws SAXException { + this.errorHandler.fatalError(new SAXParseException(msg, null, null, lineNumber, columnNumber)); + } + + private class MinMLBuffer extends Writer { + public MinMLBuffer(final Reader in) { + this.in = in; + } + + public void close() throws IOException { + flush(); + } + + public void flush() throws IOException { + try { + _flush(); + if (writer != this) writer.flush(); + } + finally { + flushed = true; + } + } + + public void write(final int c) throws IOException { + written = true; + chars[count++] = (char)c; + } + + public void write(final char[] cbuf, final int off, final int len) throws IOException { + written = true; + System.arraycopy(cbuf, off, chars, count, len); + count += len; + } + + public void saveChar(final char c) { + written = false; + chars[count++] = c; + } + + public void pushWriter(final Writer writer) { + MinML.this.tags.push(this.writer); + + this.writer = (writer == null) ? this : writer; + + flushed = written = false; + } + + public Writer getWriter() { + return writer; + } + + public void popWriter() throws IOException { + try { + if (!flushed && writer != this) writer.flush(); + } + finally { + writer = (Writer)MinML.this.tags.pop(); + flushed = written = false; + } + } + + public String getString() { + final String result = new String(chars, 0, count); + + count = 0; + return result; + } + + public void reset() { + count = 0; + } + + public int read() throws IOException { + if (nextIn == lastIn) { + if (count != 0) { + if (written) { + _flush(); + } else if (count >= (chars.length - MinML.this.bufferIncrement)) { + final char[] newChars = new char[chars.length + MinML.this.bufferIncrement]; + + System.arraycopy(chars, 0, newChars, 0, count); + chars = newChars; + } + } + + final int numRead = in.read(chars, count, chars.length - count); + + if (numRead == -1) return -1; + + nextIn = count; + lastIn = count + numRead; + } + + return chars[nextIn++]; + } + + private void _flush() throws IOException { + if (count != 0) { + try { + if (writer == this) { + try { + MinML.this.documentHandler.characters(chars, 0, count); + } + catch (final SAXException e) { + throw new IOException(e.toString()); + } + } else { + writer.write(chars, 0, count); + } + } + finally { + count = 0; + } + } + } + + private int nextIn = 0, lastIn = 0; + private char[] chars = new char[MinML.this.initialBufferSize]; + private final Reader in; + private int count = 0; + private Writer writer = this; + private boolean flushed = false; + private boolean written = false; + } + + private DocumentHandler extDocumentHandler = this; + private org.xml.sax.DocumentHandler documentHandler = this; + private ErrorHandler errorHandler = this; + private final Stack tags = new Stack(); + private int lineNumber = 1; + private int columnNumber = 0; + private final int initialBufferSize; + private final int bufferIncrement; + + private static final byte[] charClasses = { + // EOF + 13, + // \t \n \r + -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 12, -1, -1, 12, -1, -1, + // + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + // SP ! " # $ % & ' ( ) * + , - . / + 12, 8, 7, 14, 14, 14, 3, 6, 14, 14, 14, 14, 14, 11, 14, 2, + // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 5, 1, 4, + // + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + // [ \ ] + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 9, 14, 10 + }; + + private static final String[] operands = { + "\u0d15\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u0015\u0010\u1611", + "\u1711\u1000\u0b00\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u0114\u0200\u1811\u0114", + "\u1711\u1001\u0b01\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u0215\u1811\u0414", + "\u1711\u1001\u0b01\u1711\u1911\u1911\u1911\u1911\u1911\u1911\u1911\u1911\u0315\u1811\u0414", + "\u1911\u1911\u1911\u1911\u1911\u0606\u1911\u1911\u1911\u1911\u1911\u0414\u0515\u1811\u0414", + "\u1911\u1911\u1911\u1911\u1911\u0606\u1911\u1911\u1911\u1911\u1911\u1911\u0515\u1811\u1911", + "\u1a11\u1a11\u1a11\u1a11\u1a11\u1a11\u0715\u0815\u1a11\u1a11\u1a11\u1a11\u0615\u1811\u1a11", + "\u0714\u0714\u0714\u070e\u0714\u0714\u0307\u0714\u0714\u0714\u0714\u0714\u0714\u1811\u0714", + "\u0814\u0814\u0814\u080e\u0814\u0814\u0814\u0307\u0814\u0814\u0814\u0814\u0814\u1811\u0814", + "\u1711\u1002\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u0914\u0915\u1811\u0914", + "\u1b11\u1b11\u0904\u1b11\u1b11\u1b11\u1b11\u1b11\u1215\u1b11\u1b11\u1b11\u1b11\u1811\u0105", + "\u1711\u1012\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1811\u1711", + "\u1711\u1c11\u0912\u1711\u0e12\u1711\u1711\u1711\u1212\u1711\u1711\u1711\u1711\u1811\u0113", + "\u1711\u1c11\u0912\u1711\u0e12\u1711\u1711\u1711\u1212\u1711\u1711\u1711\u1711\u1811\u0113", + "\u0e15\u0e15\u0e15\u0e15\u0f15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u1811\u0e15", + "\u0e15\u0015\u0e15\u0e15\u0f15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u1811\u0e15", + "\u0c03\u110f\u110f\u110e\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u1014\u1811\u110f", + "\u0a15\u110f\u110f\u110e\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u1811\u110f", + "\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u130c\u1d11\u1408\u1d11\u1811\u1515", + "\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u110d\u130f\u130f\u1811\u130f", + "\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u0009\u1415\u1811\u1415", + "\u150a\u000b\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1811\u1515", + "expected Element", + "unexpected character in tag", + "unexpected end of file found", + "attribute name not followed by '='", + "invalid attribute value", + "expecting end tag", + "empty tag", + "unexpected character after <!" + }; +}