Added a check to ignore parse elements outside desired path step.

Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/cf09bd9a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/cf09bd9a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/cf09bd9a

Branch: refs/heads/master
Commit: cf09bd9ae41285a1e3175e4360dc19461ee17ce2
Parents: 53b1c1c
Author: Preston Carman <[email protected]>
Authored: Mon May 19 17:46:51 2014 -0700
Committer: Preston Carman <[email protected]>
Committed: Mon May 19 17:46:51 2014 -0700

----------------------------------------------------------------------
 .../vxquery/xmlparser/SAXContentHandler.java    | 240 ++++++++++---------
 1 file changed, 132 insertions(+), 108 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/cf09bd9a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index 5a64ddd..58a4f03 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
@@ -96,6 +96,8 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     private final ArrayBackedValueStorage resultABVS;
 
+    private boolean writeMode;
+
     private boolean[] subElement = null;
 
     private int t;
@@ -127,49 +129,54 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         freeENBList = new ArrayList<ElementNodeBuilder>();
         pendingText = false;
         tvp = (TaggedValuePointable) 
TaggedValuePointable.FACTORY.createPointable();
+        writeMode = false;
     }
 
     @Override
     public void characters(char[] ch, int start, int length) throws 
SAXException {
-        buffer.append(ch, start, length);
-        pendingText = true;
+        if (writeMode) {
+            buffer.append(ch, start, length);
+            pendingText = true;
+        }
     }
 
     @Override
     public void endDocument() throws SAXException {
-        try {
-            flushText();
-            docb.endChildrenChunk();
-            docb.finish();
-            if (subElement == null) {
+        if (writeMode) {
+            try {
+                flushText();
+                docb.endChildrenChunk();
+                docb.finish();
                 writeElement();
+            } catch (IOException e) {
+                e.printStackTrace();
+                throw new SAXException(e);
             }
-        } catch (IOException e) {
-            e.printStackTrace();
-            throw new SAXException(e);
         }
     }
 
     @Override
     public void endElement(String uri, String localName, String name) throws 
SAXException {
-        try {
-            flushText();
-            ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1);
-            enb.endChildrenChunk();
-            endChildInParent(enb);
-
-            if (foundChildPathStep()) {
-                writeElement();
-            }
-            if (subElement != null && depth <= subElement.length) {
-                subElement[depth - 1] = false;
+        if (writeMode) {
+            try {
+                flushText();
+                ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1);
+                enb.endChildrenChunk();
+                endChildInParent(enb);
+
+                if (foundChildPathStep()) {
+                    writeElement();
+                }
+                if (subElement != null && depth <= subElement.length) {
+                    subElement[depth - 1] = false;
+                }
+                freeENB(enb);
+            } catch (IOException e) {
+                e.printStackTrace();
+                throw new SAXException(e);
             }
-            depth--;
-            freeENB(enb);
-        } catch (IOException e) {
-            e.printStackTrace();
-            throw new SAXException(e);
         }
+        depth--;
     }
 
     @Override
@@ -182,22 +189,24 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void processingInstruction(String target, String data) throws 
SAXException {
-        try {
-            flushText();
-            startChildInParent(pinb);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(target);
-            if (createNodeIds) {
-                pinb.setLocalNodeId(nodeIdCounter++);
+        if (writeMode) {
+            try {
+                flushText();
+                startChildInParent(pinb);
+                tempABVS.reset();
+                tempABVS.getDataOutput().writeUTF(target);
+                if (createNodeIds) {
+                    pinb.setLocalNodeId(nodeIdCounter++);
+                }
+                pinb.setTarget(tempABVS);
+                tempABVS.reset();
+                tempABVS.getDataOutput().writeUTF(data);
+                pinb.setContent(tempABVS);
+                endChildInParent(pinb);
+            } catch (IOException e) {
+                e.printStackTrace();
+                throw new SAXException(e);
             }
-            pinb.setTarget(tempABVS);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(data);
-            pinb.setContent(tempABVS);
-            endChildInParent(pinb);
-        } catch (IOException e) {
-            e.printStackTrace();
-            throw new SAXException(e);
         }
     }
 
@@ -211,15 +220,20 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void startDocument() throws SAXException {
+        if (subElement == null) {
+            writeMode = true;
+        }
         try {
             db.reset();
             docABVS.reset();
-            docb.reset(docABVS);
-            if (createNodeIds) {
-                docb.setLocalNodeId(nodeIdCounter++);
+            if (writeMode) {
+                docb.reset(docABVS);
+                if (createNodeIds) {
+                    docb.setLocalNodeId(nodeIdCounter++);
+                }
+                docb.startChildrenChunk();
+                flushText();
             }
-            docb.startChildrenChunk();
-            flushText();
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
@@ -238,58 +252,65 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             }
         }
 
-        try {
-            flushText();
-            int idx = name.indexOf(':');
-            String prefix = idx < 0 ? "" : name.substring(0, idx);
-            ElementNodeBuilder enb = createENB();
-            startChildInParent(enb, foundChildPathStep());
-            int uriCode = db.lookup(uri);
-            int localNameCode = db.lookup(localName);
-            int prefixCode = db.lookup(prefix);
-            enb.setName(uriCode, localNameCode, prefixCode);
-            if (attachTypes) {
-                int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
-                int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_STR);
-                int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
-                enb.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
-            }
-            if (createNodeIds) {
-                enb.setLocalNodeId(nodeIdCounter++);
-            }
-            enb.startAttributeChunk();
-            final int nAttrs = atts.getLength();
-            for (int i = 0; i < nAttrs; ++i) {
-                String aName = atts.getQName(i);
-                int aIdx = aName.indexOf(':');
-                int aPrefixCode = db.lookup(aIdx < 0 ? "" : aName.substring(0, 
aIdx));
-                int aLocalNameCode = db.lookup(atts.getLocalName(i));
-                int aUriCode = db.lookup(atts.getURI(i));
-                String aValue = atts.getValue(i);
-                tempABVS.reset();
-                DataOutput tempOut = tempABVS.getDataOutput();
-                tempOut.write(ValueTag.XS_UNTYPED_ATOMIC_TAG);
-                tempOut.writeUTF(aValue);
-                enb.startAttribute(anb);
-                anb.setName(aUriCode, aLocalNameCode, aPrefixCode);
+        boolean start = foundChildPathStep();
+        if (start) {
+            writeMode = true;
+        }
+
+        if (writeMode) {
+            try {
+                flushText();
+                int idx = name.indexOf(':');
+                String prefix = idx < 0 ? "" : name.substring(0, idx);
+                ElementNodeBuilder enb = createENB();
+                startChildInParent(enb, start);
+                int uriCode = db.lookup(uri);
+                int localNameCode = db.lookup(localName);
+                int prefixCode = db.lookup(prefix);
+                enb.setName(uriCode, localNameCode, prefixCode);
                 if (attachTypes) {
                     int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
-                    int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_ATOMIC_STR);
+                    int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_STR);
                     int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
-                    anb.setType(typeUriCode, typeLocalNameCode, 
typePrefixCode);
+                    enb.setType(typeUriCode, typeLocalNameCode, 
typePrefixCode);
                 }
                 if (createNodeIds) {
-                    anb.setLocalNodeId(nodeIdCounter++);
+                    enb.setLocalNodeId(nodeIdCounter++);
                 }
-                anb.setValue(tempABVS);
-                enb.endAttribute(anb);
+                enb.startAttributeChunk();
+                final int nAttrs = atts.getLength();
+                for (int i = 0; i < nAttrs; ++i) {
+                    String aName = atts.getQName(i);
+                    int aIdx = aName.indexOf(':');
+                    int aPrefixCode = db.lookup(aIdx < 0 ? "" : 
aName.substring(0, aIdx));
+                    int aLocalNameCode = db.lookup(atts.getLocalName(i));
+                    int aUriCode = db.lookup(atts.getURI(i));
+                    String aValue = atts.getValue(i);
+                    tempABVS.reset();
+                    DataOutput tempOut = tempABVS.getDataOutput();
+                    tempOut.write(ValueTag.XS_UNTYPED_ATOMIC_TAG);
+                    tempOut.writeUTF(aValue);
+                    enb.startAttribute(anb);
+                    anb.setName(aUriCode, aLocalNameCode, aPrefixCode);
+                    if (attachTypes) {
+                        int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
+                        int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_ATOMIC_STR);
+                        int typePrefixCode = 
db.lookup(XQueryConstants.XS_PREFIX);
+                        anb.setType(typeUriCode, typeLocalNameCode, 
typePrefixCode);
+                    }
+                    if (createNodeIds) {
+                        anb.setLocalNodeId(nodeIdCounter++);
+                    }
+                    anb.setValue(tempABVS);
+                    enb.endAttribute(anb);
+                }
+                enb.endAttributeChunk();
+                enb.startChildrenChunk();
+                enbStack.add(enb);
+            } catch (IOException e) {
+                e.printStackTrace();
+                throw new SAXException(e);
             }
-            enb.endAttributeChunk();
-            enb.startChildrenChunk();
-            enbStack.add(enb);
-        } catch (IOException e) {
-            e.printStackTrace();
-            throw new SAXException(e);
         }
     }
 
@@ -299,21 +320,23 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void comment(char[] ch, int start, int length) throws SAXException {
-        try {
-            flushText();
-            startChildInParent(cnb);
-            buffer.append(ch, start, length);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(buffer.toString());
-            if (createNodeIds) {
-                cnb.setLocalNodeId(nodeIdCounter++);
+        if (writeMode) {
+            try {
+                flushText();
+                startChildInParent(cnb);
+                buffer.append(ch, start, length);
+                tempABVS.reset();
+                tempABVS.getDataOutput().writeUTF(buffer.toString());
+                if (createNodeIds) {
+                    cnb.setLocalNodeId(nodeIdCounter++);
+                }
+                cnb.setValue(tempABVS);
+                endChildInParent(cnb);
+                buffer.delete(0, buffer.length());
+            } catch (IOException e) {
+                e.printStackTrace();
+                throw new SAXException(e);
             }
-            cnb.setValue(tempABVS);
-            endChildInParent(cnb);
-            buffer.delete(0, buffer.length());
-        } catch (IOException e) {
-            e.printStackTrace();
-            throw new SAXException(e);
         }
     }
 
@@ -407,6 +430,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         }
         tvp.set(resultABVS.getByteArray(), resultABVS.getStartOffset(), 
resultABVS.getLength());
         addNodeToTuple(tvp, t);
+        writeMode = false;
     }
 
     public void writeDocument(ArrayBackedValueStorage abvs) throws IOException 
{
@@ -447,11 +471,11 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     }
 
     private void startChildInParent(AbstractNodeBuilder anb, boolean track) 
throws IOException {
-        if (enbStack.isEmpty()) {
-            docb.startChild(anb);
-        } else if (track) {
+        if (track) {
             elementABVS.reset();
             anb.reset(elementABVS);
+        } else if (enbStack.isEmpty()) {
+            docb.startChild(anb);
         } else {
             peekENBStackTop().startChild(anb);
         }

Reply via email to