Hi,

I have been working on an XMPP client based on the building blocks of the
vysper project
and noticed the following:
When the client receives a large Stanza response from the server, some of
the XML tokens
are not parsed properly. Specifically, tag names gets broken (i.e. instead
of "<item ...>" i see "<tem ...>").

I believe the reason is that the XMLTokenizer does not keep state between
two different IoBuffers
that the XMLParser is parsing during the stanza decoding process. It
currently keeps the last position
in the tokenized IoBuffer, but when a new IoBuffer is tokenized, the
previous partial token content
is lost.
The proposed patch fix that by buffering the on-going token chars, rather
than the last position in the current
tokenized IoBuffer.

Regards,
Eilon
### Eclipse Workspace Patch 1.0
#P vysper
Index: nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java
===================================================================
--- nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java   
(revision 1068080)
+++ nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLParser.java   
(working copy)
@@ -335,6 +335,7 @@
         elements.clear();
         nsResolver = new ParserNamespaceResolver();
         sentStartDocument = false;
+        tokenizer.restart();
     }
 
     private void xmlDeclaration() {
Index: nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java
===================================================================
--- nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java        
(revision 1068080)
+++ nbxml/src/main/java/org/apache/vysper/xml/sax/impl/XMLTokenizer.java        
(working copy)
@@ -38,7 +38,7 @@
         START, IN_TAG, IN_STRING, IN_DOUBLE_ATTRIBUTE_VALUE, 
IN_SINGLE_ATTRIBUTE_VALUE, IN_TEXT, CLOSED
     }
 
-    private int lastPosition = 0;
+    private final IoBuffer buffer = IoBuffer.allocate(16).setAutoExpand(true);
 
     private State state = State.START;
 
@@ -59,8 +59,6 @@
      * @throws Exception
      */
     public void parse(IoBuffer byteBuffer, CharsetDecoder decoder) throws 
SAXException {
-        lastPosition = byteBuffer.position();
-
         while (byteBuffer.hasRemaining() && state != State.CLOSED) {
             char c = (char) byteBuffer.get();
 
@@ -70,6 +68,7 @@
                     state = State.IN_TAG;
                 } else {
                     state = State.IN_TEXT;
+                    buffer.put((byte) c);
                 }
             } else if (state == State.IN_TEXT) {
                 if (c == '<') {
@@ -92,9 +91,10 @@
                 } else if (isControlChar(c)) {
                     emit(c, byteBuffer);
                 } else if (Character.isWhitespace(c)) {
-                    lastPosition = byteBuffer.position();
+                    buffer.clear();
                 } else {
                     state = State.IN_STRING;
+                    buffer.put((byte) c);
                 }
             } else if (state == State.IN_STRING) {
                 if (c == '>') {
@@ -109,54 +109,53 @@
                     emit(byteBuffer, CharsetUtil.UTF8_DECODER);
                     state = State.IN_TAG;
                 } else {
-                    // do nothing
+                    buffer.put((byte) c);
                 }
             } else if (state == State.IN_DOUBLE_ATTRIBUTE_VALUE) {
                 if (c == '"') {
                     emit(byteBuffer, decoder);
                     emit(c, byteBuffer);
                     state = State.IN_TAG;
+                } else {
+                    buffer.put((byte) c);
                 }
             } else if (state == State.IN_SINGLE_ATTRIBUTE_VALUE) {
                 if (c == '\'') {
                     emit(byteBuffer, decoder);
                     emit(c, byteBuffer);
                     state = State.IN_TAG;
+                } else {
+                    buffer.put((byte) c);
                 }
             }
         }
-
-        byteBuffer.position(lastPosition);
     }
 
     public void close() {
         state = State.CLOSED;
+        buffer.clear();
     }
 
+    public void restart() {
+        state = State.START;
+        buffer.clear();
+    }
+
     private boolean isControlChar(char c) {
         return c == '<' || c == '>' || c == '!' || c == '/' || c == '?' || c 
== '=';
     }
 
     private void emit(char token, IoBuffer byteBuffer) throws SAXException {
         listener.token(token, null);
-
-        lastPosition = byteBuffer.position();
     }
 
     private void emit(IoBuffer byteBuffer, CharsetDecoder decoder) throws 
SAXException {
-        int endPosition = byteBuffer.position();
-        int oldLimit = byteBuffer.limit();
-        byteBuffer.position(lastPosition);
-        byteBuffer.limit(endPosition - 1);
-
         try {
-            listener.token(NO_CHAR, byteBuffer.getString(decoder));
+            buffer.flip();
+            listener.token(NO_CHAR, buffer.getString(decoder));
+            buffer.clear();
         } catch (CharacterCodingException e) {
             throw new SAXException(e);
         }
-        byteBuffer.limit(oldLimit);
-        byteBuffer.position(endPosition);
-        lastPosition = byteBuffer.position();
-
     }
 }

Reply via email to