Hi, I have been working on an XMPP client based on the building blocks of the vysper project and noticed the following: When the client receives a large Stanza response from the server, some of the XML tokens are not parsed properly. Specifically, tag names gets broken (i.e. instead of "<item ...>" i see "<tem ...>").
I believe the reason is that the XMLTokenizer does not keep state between two different IoBuffers that the XMLParser is parsing during the stanza decoding process. It currently keeps the last position in the tokenized IoBuffer, but when a new IoBuffer is tokenized, the previous partial token content is lost. The proposed patch fix that by buffering the on-going token chars, rather than the last position in the current tokenized IoBuffer. Regards, Eilon
### Eclipse Workspace Patch 1.0 #P vysper Index: nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java =================================================================== --- nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java (revision 1068080) +++ nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java (working copy) @@ -335,6 +335,7 @@ elements.clear(); nsResolver = new ParserNamespaceResolver(); sentStartDocument = false; + tokenizer.restart(); } private void xmlDeclaration() { Index: nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java =================================================================== --- nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java (revision 1068080) +++ nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java (working copy) @@ -38,7 +38,7 @@ START, IN_TAG, IN_STRING, IN_DOUBLE_ATTRIBUTE_VALUE, IN_SINGLE_ATTRIBUTE_VALUE, IN_TEXT, CLOSED } - private int lastPosition = 0; + private final IoBuffer buffer = IoBuffer.allocate(16).setAutoExpand(true); private State state = State.START; @@ -59,8 +59,6 @@ * @throws Exception */ public void parse(IoBuffer byteBuffer, CharsetDecoder decoder) throws SAXException { - lastPosition = byteBuffer.position(); - while (byteBuffer.hasRemaining() && state != State.CLOSED) { char c = (char) byteBuffer.get(); @@ -70,6 +68,7 @@ state = State.IN_TAG; } else { state = State.IN_TEXT; + buffer.put((byte) c); } } else if (state == State.IN_TEXT) { if (c == '<') { @@ -92,9 +91,10 @@ } else if (isControlChar(c)) { emit(c, byteBuffer); } else if (Character.isWhitespace(c)) { - lastPosition = byteBuffer.position(); + buffer.clear(); } else { state = State.IN_STRING; + buffer.put((byte) c); } } else if (state == State.IN_STRING) { if (c == '>') { @@ -109,54 +109,53 @@ emit(byteBuffer, CharsetUtil.UTF8_DECODER); state = State.IN_TAG; } else { - // do nothing + buffer.put((byte) c); } } else if (state == State.IN_DOUBLE_ATTRIBUTE_VALUE) { if (c == '"') { emit(byteBuffer, decoder); emit(c, byteBuffer); state = State.IN_TAG; + } else { + buffer.put((byte) c); } } else if (state == State.IN_SINGLE_ATTRIBUTE_VALUE) { if (c == '\'') { emit(byteBuffer, decoder); emit(c, byteBuffer); state = State.IN_TAG; + } else { + buffer.put((byte) c); } } } - - byteBuffer.position(lastPosition); } public void close() { state = State.CLOSED; + buffer.clear(); } + public void restart() { + state = State.START; + buffer.clear(); + } + private boolean isControlChar(char c) { return c == '<' || c == '>' || c == '!' || c == '/' || c == '?' || c == '='; } private void emit(char token, IoBuffer byteBuffer) throws SAXException { listener.token(token, null); - - lastPosition = byteBuffer.position(); } private void emit(IoBuffer byteBuffer, CharsetDecoder decoder) throws SAXException { - int endPosition = byteBuffer.position(); - int oldLimit = byteBuffer.limit(); - byteBuffer.position(lastPosition); - byteBuffer.limit(endPosition - 1); - try { - listener.token(NO_CHAR, byteBuffer.getString(decoder)); + buffer.flip(); + listener.token(NO_CHAR, buffer.getString(decoder)); + buffer.clear(); } catch (CharacterCodingException e) { throw new SAXException(e); } - byteBuffer.limit(oldLimit); - byteBuffer.position(endPosition); - lastPosition = byteBuffer.position(); - } }