Test code for SAX Content Handler. The output byte array is correct. Still need 
to test and clean up code. Check point.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/df6772cf
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/df6772cf
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/df6772cf

Branch: refs/heads/prestonc/parser
Commit: df6772cf02a41a7d87c012c6e37775f4dce6158b
Parents: a76d647
Author: Preston Carman <[email protected]>
Authored: Fri Feb 14 23:25:05 2014 -0800
Committer: Preston Carman <[email protected]>
Committed: Thu Feb 27 14:22:24 2014 -0800

----------------------------------------------------------------------
 .../vxquery/xmlparser/SAXContentHandler.java    | 270 +++++++++++++++++++
 1 file changed, 270 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/df6772cf/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index a8ec0b9..0ef2991 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
@@ -30,6 +30,7 @@ import 
org.apache.vxquery.datamodel.builders.nodes.PINodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.TextNodeBuilder;
 import org.apache.vxquery.datamodel.values.ValueTag;
 import org.apache.vxquery.types.BuiltinTypeQNames;
+import org.apache.vxquery.util.GrowableIntArray;
 import org.apache.vxquery.xmlquery.query.XQueryConstants;
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
@@ -37,6 +38,8 @@ import org.xml.sax.Locator;
 import org.xml.sax.SAXException;
 import org.xml.sax.ext.LexicalHandler;
 
+import edu.uci.ics.hyracks.data.std.primitive.BytePointable;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
 import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
 
 public class SAXContentHandler implements ContentHandler, LexicalHandler {
@@ -72,6 +75,35 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     private boolean pendingText;
 
+    private final ArrayBackedValueStorage leavesABVS;
+
+    // Structure and data.
+    private final GrowableIntArray leavesKind;
+    private final GrowableIntArray leavesStart;
+    private final GrowableIntArray leavesEnd;
+    private final GrowableIntArray leavesDepth;
+    private final GrowableIntArray leavesParent;
+    private final GrowableIntArray leavesAttributeCount;
+    private final GrowableIntArray leavesChildrenCount;
+
+    // Data keys for current progress through XML document.
+    private int textCount = 0;
+    private int textCurrentDepth = 0;
+    private int childSlotCounter = 0;
+    private final GrowableIntArray previousLeaf;
+    private final GrowableIntArray childStartOffset;
+    private final GrowableIntArray childSlotOffset;
+    private final int SLOT_SIZE = 4;
+
+    private final int LEAF_TEXT = 1;
+    private final int LEAF_PRE_NODE = 2;
+    private final int LEAF_COMMENT = 3;
+    private final int LEAF_ATTRIBUTE = 4;
+    private final int LEAF_PI = 5;
+    private final int LEAF_PRE_DOCUMENT = 5;
+    private final int LEAF_POST_DOCUMENT = 6;
+    private final int LEAF_POST_NODE = 7;
+
     public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider 
nodeIdProvider) {
         docABVS = new ArrayBackedValueStorage();
         this.createNodeIds = nodeIdProvider != null;
@@ -88,6 +120,21 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         enbStack = new ArrayList<ElementNodeBuilder>();
         freeENBList = new ArrayList<ElementNodeBuilder>();
         pendingText = false;
+
+        leavesKind = new GrowableIntArray();
+        leavesStart = new GrowableIntArray();
+        leavesABVS = new ArrayBackedValueStorage();
+        leavesEnd = new GrowableIntArray();
+        leavesDepth = new GrowableIntArray();
+        leavesParent = new GrowableIntArray();
+        leavesAttributeCount = new GrowableIntArray();
+        leavesChildrenCount = new GrowableIntArray();
+        previousLeaf = new GrowableIntArray();
+        childStartOffset = new GrowableIntArray();
+        childSlotOffset = new GrowableIntArray();
+        textCount = 0;
+        textCurrentDepth = 0;
+
     }
 
     @Override
@@ -106,6 +153,23 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             e.printStackTrace();
             throw new SAXException(e);
         }
+
+        leafNodeStart(LEAF_POST_DOCUMENT);
+        leafNodeEnd();
+
+        textCurrentDepth--;
+        int[] k = leavesKind.getArray();
+        int[] s = leavesStart.getArray();
+        int[] e = leavesEnd.getArray();
+        int[] d = leavesDepth.getArray();
+        int[] p = leavesParent.getArray();
+        int[] c = leavesChildrenCount.getArray();
+        int[] a = leavesAttributeCount.getArray();
+        for (int i = 0; i < s.length; ++i) {
+            System.err.println(i + " " + k[i] + " - " + d[i] + ":" + s[i] + 
":" + e[i] + " p=" + p[i] + " a=" + a[i]
+                    + " c=" + c[i]);
+        }
+
     }
 
     @Override
@@ -120,6 +184,12 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             e.printStackTrace();
             throw new SAXException(e);
         }
+
+        leafNodeStart(LEAF_POST_NODE);
+        leafNodeEnd();
+
+        textCurrentDepth--;
+
     }
 
     @Override
@@ -145,6 +215,17 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             tempABVS.getDataOutput().writeUTF(data);
             pinb.setContent(tempABVS);
             endChildInParent(pinb);
+
+            // Save to leavesABVS
+            leafNodeStart(LEAF_PI);
+            pinb.reset(leavesABVS);
+            if (createNodeIds) {
+                pinb.setLocalNodeId(nodeIdCounter);
+            }
+            pinb.setTarget(target);
+            pinb.setContent(data);
+            leafNodeEnd();
+
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
@@ -161,7 +242,9 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void startDocument() throws SAXException {
+        textCurrentDepth++;
         try {
+            nodeIdCounter = 0;
             db.reset();
             docABVS.reset();
             docb.reset(docABVS);
@@ -169,6 +252,15 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
                 docb.setLocalNodeId(nodeIdCounter++);
             }
             docb.startChildrenChunk();
+
+            leafNodeStart(LEAF_PRE_DOCUMENT);
+            DocumentNodeBuilder docb2 = new DocumentNodeBuilder();
+            docb2.reset(leavesABVS);
+            if (createNodeIds) {
+                docb2.setLocalNodeId(nodeIdCounter);
+            }
+            leafNodeEnd();
+
             flushText();
         } catch (IOException e) {
             e.printStackTrace();
@@ -178,6 +270,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void startElement(String uri, String localName, String name, 
Attributes atts) throws SAXException {
+
         try {
             flushText();
             int idx = name.indexOf(':');
@@ -197,6 +290,24 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             if (createNodeIds) {
                 enb.setLocalNodeId(nodeIdCounter++);
             }
+
+            // Save to leavesABVS
+            leafNodeStart(LEAF_PRE_NODE);
+            ElementNodeBuilder enb2 = createENB();
+            enb2.setMvs(leavesABVS);
+            enb2.setName(uriCode, localNameCode, prefixCode);
+            if (attachTypes) {
+                int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
+                int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_STR);
+                int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
+                enb2.setType(typeUriCode, typeLocalNameCode, typePrefixCode);
+            }
+            if (createNodeIds) {
+                enb2.setLocalNodeId(nodeIdCounter);
+            }
+            leafNodeEnd();
+            textCurrentDepth++;
+
             enb.startAttributeChunk();
             final int nAttrs = atts.getLength();
             for (int i = 0; i < nAttrs; ++i) {
@@ -223,6 +334,23 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
                 }
                 anb.setValue(tempABVS);
                 enb.endAttribute(anb);
+
+                // Save to leavesABVS
+                leafNodeStart(LEAF_ATTRIBUTE);
+                anb.reset(leavesABVS);
+                anb.setName(aUriCode, aLocalNameCode, aPrefixCode);
+                if (attachTypes) {
+                    int typeUriCode = db.lookup(XQueryConstants.XS_NSURI);
+                    int typeLocalNameCode = 
db.lookup(BuiltinTypeQNames.UNTYPED_ATOMIC_STR);
+                    int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX);
+                    anb.setType(typeUriCode, typeLocalNameCode, 
typePrefixCode);
+                }
+                if (createNodeIds) {
+                    anb.setLocalNodeId(nodeIdCounter++);
+                }
+                anb.setValue(atts.getValue(i));
+                leafNodeEnd();
+
             }
             enb.endAttributeChunk();
             enb.startChildrenChunk();
@@ -250,6 +378,16 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             }
             cnb.setValue(tempABVS);
             endChildInParent(cnb);
+
+            // Save to leavesABVS
+            leafNodeStart(LEAF_COMMENT);
+            cnb.reset(leavesABVS);
+            if (createNodeIds) {
+                cnb.setLocalNodeId(nodeIdCounter);
+            }
+            cnb.setValue(buffer.toString());
+            leafNodeEnd();
+
             buffer.delete(0, buffer.length());
         } catch (IOException e) {
             e.printStackTrace();
@@ -267,6 +405,16 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             }
             tnb.setValue(tempABVS);
             peekENBStackTop().endChild(tnb);
+
+            // Save to leavesABVS
+            leafNodeStart(LEAF_TEXT);
+            tnb.reset(leavesABVS);
+            if (createNodeIds) {
+                tnb.setLocalNodeId(nodeIdCounter);
+            }
+            tnb.setValue(buffer.toString());
+            leafNodeEnd();
+
             buffer.delete(0, buffer.length());
             pendingText = false;
         }
@@ -314,6 +462,100 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         out.write(docABVS.getByteArray(), docABVS.getStartOffset(), 
docABVS.getLength());
     }
 
+    public void writeOnce(ArrayBackedValueStorage abvs) throws IOException {
+        DataOutput out = abvs.getDataOutput();
+        out.write(ValueTag.NODE_TREE_TAG);
+        byte header = NodeTreePointable.HEADER_DICTIONARY_EXISTS_MASK;
+        if (attachTypes) {
+            header |= NodeTreePointable.HEADER_TYPE_EXISTS_MASK;
+        }
+        if (createNodeIds) {
+            header |= NodeTreePointable.HEADER_NODEID_EXISTS_MASK;
+        }
+        out.write(header);
+        if (createNodeIds) {
+            out.writeInt(nodeIdProvider.getId());
+        }
+        db.write(abvs);
+
+        for (int i = 0; i < leavesKind.getSize(); ++i) {
+            if (leavesKind.getArray()[i] == LEAF_PRE_DOCUMENT) {
+                out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i],
+                        leavesEnd.getArray()[i] - leavesStart.getArray()[i]);
+
+                int children = leavesChildrenCount.getArray()[i];
+                if (children > 0) {
+                    out.writeInt(children);
+                    int offset = abvs.getLength();
+                    for (int s = 0; s < children; ++s) {
+                        out.writeInt(-1);
+                        addChildSlot(offset, s, children);
+                    }
+                }
+                // Continue with nodes.
+
+            } else if (leavesKind.getArray()[i] == LEAF_PRE_NODE) {
+                int nsCount = 0;
+                int attrCount = leavesAttributeCount.getArray()[i];
+                int childrenCount = leavesChildrenCount.getArray()[i];
+
+                ElementNodeBuilder enb2 = createENB();
+                enb2.setMvs(abvs);
+                enb2.setTagHeader(nsCount, attrCount, childrenCount);
+
+                out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i],
+                        leavesEnd.getArray()[i] - leavesStart.getArray()[i]);
+
+                if (attrCount > 0) {
+                    out.writeInt(attrCount);
+                    int offset = abvs.getLength();
+                    for (int s = 0; s < attrCount; ++s) {
+                        out.writeInt(-1);
+                        addChildSlot(offset, s, attrCount);
+                    }
+                    for (int s = 0; s < attrCount; ++s) {
+                        ++i;
+                        out.write(leavesABVS.getByteArray(), 
leavesStart.getArray()[i], leavesEnd.getArray()[i]
+                                - leavesStart.getArray()[i]);
+                    }
+                }
+
+                if (childrenCount > 0) {
+                    out.writeInt(childrenCount);
+                    int offset = abvs.getLength();
+                    for (int s = 0; s < childrenCount; ++s) {
+                        out.writeInt(-1);
+                        addChildSlot(offset, s, childrenCount);
+                    }
+                }
+                // Continue with nodes.
+
+            } else {
+                if (leavesKind.getArray()[i] != LEAF_POST_DOCUMENT && 
leavesKind.getArray()[i] != LEAF_POST_NODE)
+                out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i],
+                        leavesEnd.getArray()[i] - leavesStart.getArray()[i]);
+                finishChildSlot(abvs);
+            }
+        }
+    }
+
+    private void addChildSlot(int offset, int count, int total) {
+        childStartOffset.insert(childSlotCounter, offset + total * SLOT_SIZE);
+        childSlotOffset.insert(childSlotCounter, offset + (total - count - 1) 
* SLOT_SIZE);
+        childSlotCounter++;
+
+        for (int i = 0; i < childSlotCounter; ++i) {
+            System.err.println("\t\t" + i + " " + 
childStartOffset.getArray()[i] + " - "
+                    + childSlotOffset.getArray()[i]);
+        }
+    }
+
+    private void finishChildSlot(ArrayBackedValueStorage abvs) {
+        childSlotCounter--;
+        int length = abvs.getLength() - 
childStartOffset.getArray()[childSlotCounter];
+        IntegerPointable.setInteger(abvs.getByteArray(), 
childSlotOffset.getArray()[childSlotCounter], length);
+    }
+
     private ElementNodeBuilder createENB() {
         if (freeENBList.isEmpty()) {
             return new ElementNodeBuilder();
@@ -344,4 +586,32 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             peekENBStackTop().endChild(anb);
         }
     }
+
+    private void leafNodeStart(int kind) {
+        leavesKind.append(kind);
+        leavesStart.append(leavesABVS.getLength());
+        leavesDepth.append(textCurrentDepth);
+        leavesAttributeCount.append(0);
+        leavesChildrenCount.append(0);
+
+        int parent = previousLeaf.getArray()[textCurrentDepth - 1];
+        leavesParent.append(parent);
+        if (kind == LEAF_POST_NODE || kind == LEAF_POST_DOCUMENT) {
+            // Skip Count
+        } else if (kind == LEAF_ATTRIBUTE) {
+            leavesAttributeCount.getArray()[parent] += 1;
+        } else if (textCount != parent) {
+            leavesChildrenCount.getArray()[parent] += 1;
+        }
+        // If it can have children.
+        if (kind == LEAF_PRE_NODE || kind == LEAF_PRE_DOCUMENT) {
+            previousLeaf.getArray()[textCurrentDepth] = textCount;
+        }
+    }
+
+    private void leafNodeEnd() {
+        leavesEnd.append(leavesABVS.getLength());
+        textCount++;
+    }
+
 }
\ No newline at end of file

Reply via email to