Working version with only the new code.

Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/7336f47c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/7336f47c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/7336f47c

Branch: refs/heads/prestonc/parser
Commit: 7336f47ca5e7ef39e683fc4d56ca4c92e4b44091
Parents: df6772c
Author: Preston Carman <[email protected]>
Authored: Sat Feb 15 01:31:25 2014 -0800
Committer: Preston Carman <[email protected]>
Committed: Thu Feb 27 14:22:24 2014 -0800

----------------------------------------------------------------------
 .../vxquery/xmlparser/SAXContentHandler.java    | 307 ++++++-------------
 1 file changed, 90 insertions(+), 217 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/7336f47c/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index 0ef2991..3ead13a 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
@@ -16,11 +16,8 @@ package org.apache.vxquery.xmlparser;
 
 import java.io.DataOutput;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 
 import org.apache.vxquery.datamodel.accessors.nodes.NodeTreePointable;
-import org.apache.vxquery.datamodel.builders.nodes.AbstractNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.AttributeNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.CommentNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.DictionaryBuilder;
@@ -38,12 +35,10 @@ import org.xml.sax.Locator;
 import org.xml.sax.SAXException;
 import org.xml.sax.ext.LexicalHandler;
 
-import edu.uci.ics.hyracks.data.std.primitive.BytePointable;
 import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
 import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
 
 public class SAXContentHandler implements ContentHandler, LexicalHandler {
-    private final ArrayBackedValueStorage docABVS;
 
     private final boolean createNodeIds;
 
@@ -51,8 +46,6 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     private final ITreeNodeIdProvider nodeIdProvider;
 
-    private final ArrayBackedValueStorage tempABVS;
-
     private final DocumentNodeBuilder docb;
 
     private final TextNodeBuilder tnb;
@@ -67,22 +60,20 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     private final StringBuilder buffer;
 
-    private final List<ElementNodeBuilder> enbStack;
-
-    private final List<ElementNodeBuilder> freeENBList;
-
     private int nodeIdCounter;
 
     private boolean pendingText;
 
     private final ArrayBackedValueStorage leavesABVS;
 
+    private final ElementNodeBuilder enb;
+
     // Structure and data.
     private final GrowableIntArray leavesKind;
     private final GrowableIntArray leavesStart;
     private final GrowableIntArray leavesEnd;
-    private final GrowableIntArray leavesDepth;
-    private final GrowableIntArray leavesParent;
+//    private final GrowableIntArray leavesDepth;
+//    private final GrowableIntArray leavesParent;
     private final GrowableIntArray leavesAttributeCount;
     private final GrowableIntArray leavesChildrenCount;
 
@@ -105,11 +96,10 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     private final int LEAF_POST_NODE = 7;
 
     public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider 
nodeIdProvider) {
-        docABVS = new ArrayBackedValueStorage();
         this.createNodeIds = nodeIdProvider != null;
         this.attachTypes = attachTypes;
         this.nodeIdProvider = nodeIdProvider;
-        this.tempABVS = new ArrayBackedValueStorage();
+        enb = new ElementNodeBuilder();
         docb = new DocumentNodeBuilder();
         tnb = new TextNodeBuilder();
         cnb = new CommentNodeBuilder();
@@ -117,23 +107,22 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         anb = new AttributeNodeBuilder();
         db = new DictionaryBuilder();
         buffer = new StringBuilder();
-        enbStack = new ArrayList<ElementNodeBuilder>();
-        freeENBList = new ArrayList<ElementNodeBuilder>();
         pendingText = false;
 
-        leavesKind = new GrowableIntArray();
-        leavesStart = new GrowableIntArray();
+        leavesKind = new GrowableIntArray(600);
+        leavesStart = new GrowableIntArray(600);
         leavesABVS = new ArrayBackedValueStorage();
-        leavesEnd = new GrowableIntArray();
-        leavesDepth = new GrowableIntArray();
-        leavesParent = new GrowableIntArray();
-        leavesAttributeCount = new GrowableIntArray();
-        leavesChildrenCount = new GrowableIntArray();
-        previousLeaf = new GrowableIntArray();
-        childStartOffset = new GrowableIntArray();
-        childSlotOffset = new GrowableIntArray();
+        leavesEnd = new GrowableIntArray(600);
+//        leavesDepth = new GrowableIntArray(600);
+//        leavesParent = new GrowableIntArray(600);
+        leavesAttributeCount = new GrowableIntArray(600);
+        leavesChildrenCount = new GrowableIntArray(600);
+        previousLeaf = new GrowableIntArray(600);
+        childStartOffset = new GrowableIntArray(600);
+        childSlotOffset = new GrowableIntArray(600);
         textCount = 0;
         textCurrentDepth = 0;
+        childSlotCounter = 0;
 
     }
 
@@ -147,28 +136,25 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     public void endDocument() throws SAXException {
         try {
             flushText();
-            docb.endChildrenChunk();
-            docb.finish();
+            leafNodeStart(LEAF_POST_DOCUMENT);
+            leafNodeEnd();
+            textCurrentDepth--;
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
         }
 
-        leafNodeStart(LEAF_POST_DOCUMENT);
-        leafNodeEnd();
-
-        textCurrentDepth--;
-        int[] k = leavesKind.getArray();
-        int[] s = leavesStart.getArray();
-        int[] e = leavesEnd.getArray();
-        int[] d = leavesDepth.getArray();
-        int[] p = leavesParent.getArray();
-        int[] c = leavesChildrenCount.getArray();
-        int[] a = leavesAttributeCount.getArray();
-        for (int i = 0; i < s.length; ++i) {
-            System.err.println(i + " " + k[i] + " - " + d[i] + ":" + s[i] + 
":" + e[i] + " p=" + p[i] + " a=" + a[i]
-                    + " c=" + c[i]);
-        }
+        //        int[] k = leavesKind.getArray();
+        //        int[] s = leavesStart.getArray();
+        //        int[] e = leavesEnd.getArray();
+        //        int[] d = leavesDepth.getArray();
+        //        int[] p = leavesParent.getArray();
+        //        int[] c = leavesChildrenCount.getArray();
+        //        int[] a = leavesAttributeCount.getArray();
+        //        for (int i = 0; i < s.length; ++i) {
+        //            System.err.println(i + " " + k[i] + " - " + d[i] + ":" + 
s[i] + ":" + e[i] + " p=" + p[i] + " a=" + a[i]
+        //                    + " c=" + c[i]);
+        //        }
 
     }
 
@@ -176,20 +162,13 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     public void endElement(String uri, String localName, String name) throws 
SAXException {
         try {
             flushText();
-            ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1);
-            enb.endChildrenChunk();
-            endChildInParent(enb);
-            freeENB(enb);
+            leafNodeStart(LEAF_POST_NODE);
+            leafNodeEnd();
+            textCurrentDepth--;
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
         }
-
-        leafNodeStart(LEAF_POST_NODE);
-        leafNodeEnd();
-
-        textCurrentDepth--;
-
     }
 
     @Override
@@ -204,28 +183,15 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     public void processingInstruction(String target, String data) throws 
SAXException {
         try {
             flushText();
-            startChildInParent(pinb);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(target);
-            if (createNodeIds) {
-                pinb.setLocalNodeId(nodeIdCounter++);
-            }
-            pinb.setTarget(tempABVS);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(data);
-            pinb.setContent(tempABVS);
-            endChildInParent(pinb);
-
             // Save to leavesABVS
             leafNodeStart(LEAF_PI);
             pinb.reset(leavesABVS);
             if (createNodeIds) {
-                pinb.setLocalNodeId(nodeIdCounter);
+                pinb.setLocalNodeId(nodeIdCounter++);
             }
             pinb.setTarget(target);
             pinb.setContent(data);
             leafNodeEnd();
-
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
@@ -242,22 +208,30 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void startDocument() throws SAXException {
+        leavesKind.clear();
+        leavesStart.clear();
+        leavesABVS.reset();
+        leavesEnd.clear();
+//        leavesDepth.clear();
+//        leavesParent.clear();
+        leavesAttributeCount.clear();
+        leavesChildrenCount.clear();
+        previousLeaf.clear();
+        childStartOffset.clear();
+        childSlotOffset.clear();
+        textCount = 0;
+        textCurrentDepth = 0;
+        childSlotCounter = 0;
+
         textCurrentDepth++;
         try {
             nodeIdCounter = 0;
             db.reset();
-            docABVS.reset();
-            docb.reset(docABVS);
-            if (createNodeIds) {
-                docb.setLocalNodeId(nodeIdCounter++);
-            }
-            docb.startChildrenChunk();
 
             leafNodeStart(LEAF_PRE_DOCUMENT);
-            DocumentNodeBuilder docb2 = new DocumentNodeBuilder();
-            docb2.reset(leavesABVS);
+            docb.reset(leavesABVS);
             if (createNodeIds) {
-                docb2.setLocalNodeId(nodeIdCounter);
+                docb.setLocalNodeId(nodeIdCounter++);
             }
             leafNodeEnd();
 
@@ -275,40 +249,26 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             flushText();
             int idx = name.indexOf(':');
             String prefix = idx < 0 ? "" : name.substring(0, idx);
-            ElementNodeBuilder enb = createENB();
-            startChildInParent(enb);
             int uriCode = db.lookup(uri);
             int localNameCode = db.lookup(localName);
             int prefixCode = db.lookup(prefix);
-            enb.setName(uriCode, localNameCode, prefixCode);
-            if (attachTypes) {
-                int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
-                int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_STR);
-                int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
-                enb.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
-            }
-            if (createNodeIds) {
-                enb.setLocalNodeId(nodeIdCounter++);
-            }
 
             // Save to leavesABVS
             leafNodeStart(LEAF_PRE_NODE);
-            ElementNodeBuilder enb2 = createENB();
-            enb2.setMvs(leavesABVS);
-            enb2.setName(uriCode, localNameCode, prefixCode);
+            enb.setMvs(leavesABVS);
+            enb.setName(uriCode, localNameCode, prefixCode);
             if (attachTypes) {
                 int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
                 int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_STR);
                 int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
-                enb2.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
+                enb.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
             }
             if (createNodeIds) {
-                enb2.setLocalNodeId(nodeIdCounter);
+                enb.setLocalNodeId(nodeIdCounter++);
             }
             leafNodeEnd();
             textCurrentDepth++;
 
-            enb.startAttributeChunk();
             final int nAttrs = atts.getLength();
             for (int i = 0; i < nAttrs; ++i) {
                 String aName = atts.getQName(i);
@@ -316,24 +276,6 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
                 int aPrefixCode = db.lookup(aIdx < 0 ? "" : aName.substring(0, 
aIdx));
                 int aLocalNameCode = db.lookup(atts.getLocalName(i));
                 int aUriCode = db.lookup(atts.getURI(i));
-                String aValue = atts.getValue(i);
-                tempABVS.reset();
-                DataOutput tempOut = tempABVS.getDataOutput();
-                tempOut.write(ValueTag.XS_UNTYPED_ATOMIC_TAG);
-                tempOut.writeUTF(aValue);
-                enb.startAttribute(anb);
-                anb.setName(aUriCode, aLocalNameCode, aPrefixCode);
-                if (attachTypes) {
-                    int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
-                    int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_ATOMIC_STR);
-                    int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
-                    anb.setType(typeUriCode, typeLocalNameCode, 
typePrefixCode);
-                }
-                if (createNodeIds) {
-                    anb.setLocalNodeId(nodeIdCounter++);
-                }
-                anb.setValue(tempABVS);
-                enb.endAttribute(anb);
 
                 // Save to leavesABVS
                 leafNodeStart(LEAF_ATTRIBUTE);
@@ -350,11 +292,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
                 }
                 anb.setValue(atts.getValue(i));
                 leafNodeEnd();
-
             }
-            enb.endAttributeChunk();
-            enb.startChildrenChunk();
-            enbStack.add(enb);
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
@@ -369,21 +307,13 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     public void comment(char[] ch, int start, int length) throws SAXException {
         try {
             flushText();
-            startChildInParent(cnb);
             buffer.append(ch, start, length);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(buffer.toString());
-            if (createNodeIds) {
-                cnb.setLocalNodeId(nodeIdCounter++);
-            }
-            cnb.setValue(tempABVS);
-            endChildInParent(cnb);
 
             // Save to leavesABVS
             leafNodeStart(LEAF_COMMENT);
             cnb.reset(leavesABVS);
             if (createNodeIds) {
-                cnb.setLocalNodeId(nodeIdCounter);
+                cnb.setLocalNodeId(nodeIdCounter++);
             }
             cnb.setValue(buffer.toString());
             leafNodeEnd();
@@ -397,20 +327,11 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     private void flushText() throws IOException {
         if (pendingText) {
-            peekENBStackTop().startChild(tnb);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(buffer.toString());
-            if (createNodeIds) {
-                tnb.setLocalNodeId(nodeIdCounter++);
-            }
-            tnb.setValue(tempABVS);
-            peekENBStackTop().endChild(tnb);
-
             // Save to leavesABVS
             leafNodeStart(LEAF_TEXT);
             tnb.reset(leavesABVS);
             if (createNodeIds) {
-                tnb.setLocalNodeId(nodeIdCounter);
+                tnb.setLocalNodeId(nodeIdCounter++);
             }
             tnb.setValue(buffer.toString());
             leafNodeEnd();
@@ -459,24 +380,6 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             out.writeInt(nodeIdProvider.getId());
         }
         db.write(abvs);
-        out.write(docABVS.getByteArray(), docABVS.getStartOffset(), 
docABVS.getLength());
-    }
-
-    public void writeOnce(ArrayBackedValueStorage abvs) throws IOException {
-        DataOutput out = abvs.getDataOutput();
-        out.write(ValueTag.NODE_TREE_TAG);
-        byte header = NodeTreePointable.HEADER_DICTIONARY_EXISTS_MASK;
-        if (attachTypes) {
-            header |= NodeTreePointable.HEADER_TYPE_EXISTS_MASK;
-        }
-        if (createNodeIds) {
-            header |= NodeTreePointable.HEADER_NODEID_EXISTS_MASK;
-        }
-        out.write(header);
-        if (createNodeIds) {
-            out.writeInt(nodeIdProvider.getId());
-        }
-        db.write(abvs);
 
         for (int i = 0; i < leavesKind.getSize(); ++i) {
             if (leavesKind.getArray()[i] == LEAF_PRE_DOCUMENT) {
@@ -485,12 +388,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
                 int children = leavesChildrenCount.getArray()[i];
                 if (children > 0) {
-                    out.writeInt(children);
-                    int offset = abvs.getLength();
-                    for (int s = 0; s < children; ++s) {
-                        out.writeInt(-1);
-                        addChildSlot(offset, s, children);
-                    }
+                    sequenceSlotStub(abvs, children);
                 }
                 // Continue with nodes.
 
@@ -499,103 +397,78 @@ public class SAXContentHandler implements 
ContentHandler, LexicalHandler {
                 int attrCount = leavesAttributeCount.getArray()[i];
                 int childrenCount = leavesChildrenCount.getArray()[i];
 
-                ElementNodeBuilder enb2 = createENB();
-                enb2.setMvs(abvs);
-                enb2.setTagHeader(nsCount, attrCount, childrenCount);
+                enb.setMvs(abvs);
+                enb.setTagHeader(nsCount, attrCount, childrenCount);
 
                 out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i],
                         leavesEnd.getArray()[i] - leavesStart.getArray()[i]);
 
                 if (attrCount > 0) {
-                    out.writeInt(attrCount);
-                    int offset = abvs.getLength();
-                    for (int s = 0; s < attrCount; ++s) {
-                        out.writeInt(-1);
-                        addChildSlot(offset, s, attrCount);
-                    }
+                    sequenceSlotStub(abvs, attrCount);
                     for (int s = 0; s < attrCount; ++s) {
                         ++i;
                         out.write(leavesABVS.getByteArray(), 
leavesStart.getArray()[i], leavesEnd.getArray()[i]
                                 - leavesStart.getArray()[i]);
+                        updateSequenceSlot(abvs);
                     }
                 }
 
                 if (childrenCount > 0) {
-                    out.writeInt(childrenCount);
-                    int offset = abvs.getLength();
-                    for (int s = 0; s < childrenCount; ++s) {
-                        out.writeInt(-1);
-                        addChildSlot(offset, s, childrenCount);
-                    }
+                    sequenceSlotStub(abvs, childrenCount);
                 }
                 // Continue with nodes.
 
+            } else if (leavesKind.getArray()[i] == LEAF_POST_DOCUMENT) {
+                // no action
+            } else if (leavesKind.getArray()[i] == LEAF_POST_NODE) {
+                updateSequenceSlot(abvs);
             } else {
-                if (leavesKind.getArray()[i] != LEAF_POST_DOCUMENT && 
leavesKind.getArray()[i] != LEAF_POST_NODE)
                 out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i],
                         leavesEnd.getArray()[i] - leavesStart.getArray()[i]);
-                finishChildSlot(abvs);
+                updateSequenceSlot(abvs);
             }
         }
     }
 
-    private void addChildSlot(int offset, int count, int total) {
+    private void sequenceSlotStub(ArrayBackedValueStorage abvs, int count) 
throws IOException {
+        DataOutput out = abvs.getDataOutput();
+        out.writeInt(count);
+        int offset = abvs.getLength();
+        for (int s = 0; s < count; ++s) {
+            out.writeInt(-1);
+            addSequenceSlot(offset, s, count);
+        }
+        //
+        //        for (int i = 0; i < childSlotCounter; ++i) {
+        //            System.err.println("\t\t" + i + " " + 
childStartOffset.getArray()[i] + " - "
+        //                    + childSlotOffset.getArray()[i]);
+        //        }
+    }
+
+    private void addSequenceSlot(int offset, int count, int total) {
         childStartOffset.insert(childSlotCounter, offset + total * SLOT_SIZE);
         childSlotOffset.insert(childSlotCounter, offset + (total - count - 1) 
* SLOT_SIZE);
         childSlotCounter++;
-
-        for (int i = 0; i < childSlotCounter; ++i) {
-            System.err.println("\t\t" + i + " " + 
childStartOffset.getArray()[i] + " - "
-                    + childSlotOffset.getArray()[i]);
-        }
     }
 
-    private void finishChildSlot(ArrayBackedValueStorage abvs) {
+    private void updateSequenceSlot(ArrayBackedValueStorage abvs) {
+        //        for (int i = 0; i < childSlotCounter; ++i) {
+        //            System.err.println("\t" + i + " " + 
childStartOffset.getArray()[i] + " - " + childSlotOffset.getArray()[i]);
+        //        }
         childSlotCounter--;
         int length = abvs.getLength() - 
childStartOffset.getArray()[childSlotCounter];
         IntegerPointable.setInteger(abvs.getByteArray(), 
childSlotOffset.getArray()[childSlotCounter], length);
     }
 
-    private ElementNodeBuilder createENB() {
-        if (freeENBList.isEmpty()) {
-            return new ElementNodeBuilder();
-        }
-        return freeENBList.remove(freeENBList.size() - 1);
-    }
-
-    private void freeENB(ElementNodeBuilder enb) {
-        freeENBList.add(enb);
-    }
-
-    private ElementNodeBuilder peekENBStackTop() {
-        return enbStack.get(enbStack.size() - 1);
-    }
-
-    private void startChildInParent(AbstractNodeBuilder anb) throws 
IOException {
-        if (enbStack.isEmpty()) {
-            docb.startChild(anb);
-        } else {
-            peekENBStackTop().startChild(anb);
-        }
-    }
-
-    private void endChildInParent(AbstractNodeBuilder anb) throws IOException {
-        if (enbStack.isEmpty()) {
-            docb.endChild(anb);
-        } else {
-            peekENBStackTop().endChild(anb);
-        }
-    }
-
     private void leafNodeStart(int kind) {
         leavesKind.append(kind);
         leavesStart.append(leavesABVS.getLength());
-        leavesDepth.append(textCurrentDepth);
+//        leavesDepth.append(textCurrentDepth);
         leavesAttributeCount.append(0);
         leavesChildrenCount.append(0);
 
         int parent = previousLeaf.getArray()[textCurrentDepth - 1];
-        leavesParent.append(parent);
+//        leavesParent.append(parent);
         if (kind == LEAF_POST_NODE || kind == LEAF_POST_DOCUMENT) {
             // Skip Count
         } else if (kind == LEAF_ATTRIBUTE) {

Reply via email to