Working version with only the new code.
Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/7336f47c Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/7336f47c Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/7336f47c Branch: refs/heads/prestonc/parser Commit: 7336f47ca5e7ef39e683fc4d56ca4c92e4b44091 Parents: df6772c Author: Preston Carman <[email protected]> Authored: Sat Feb 15 01:31:25 2014 -0800 Committer: Preston Carman <[email protected]> Committed: Thu Feb 27 14:22:24 2014 -0800 ---------------------------------------------------------------------- .../vxquery/xmlparser/SAXContentHandler.java | 307 ++++++------------- 1 file changed, 90 insertions(+), 217 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/7336f47c/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java index 0ef2991..3ead13a 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java @@ -16,11 +16,8 @@ package org.apache.vxquery.xmlparser; import java.io.DataOutput; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import org.apache.vxquery.datamodel.accessors.nodes.NodeTreePointable; -import org.apache.vxquery.datamodel.builders.nodes.AbstractNodeBuilder; import org.apache.vxquery.datamodel.builders.nodes.AttributeNodeBuilder; import org.apache.vxquery.datamodel.builders.nodes.CommentNodeBuilder; import org.apache.vxquery.datamodel.builders.nodes.DictionaryBuilder; @@ -38,12 +35,10 @@ import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.ext.LexicalHandler; -import edu.uci.ics.hyracks.data.std.primitive.BytePointable; import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable; import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage; public class SAXContentHandler implements ContentHandler, LexicalHandler { - private final ArrayBackedValueStorage docABVS; private final boolean createNodeIds; @@ -51,8 +46,6 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { private final ITreeNodeIdProvider nodeIdProvider; - private final ArrayBackedValueStorage tempABVS; - private final DocumentNodeBuilder docb; private final TextNodeBuilder tnb; @@ -67,22 +60,20 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { private final StringBuilder buffer; - private final List<ElementNodeBuilder> enbStack; - - private final List<ElementNodeBuilder> freeENBList; - private int nodeIdCounter; private boolean pendingText; private final ArrayBackedValueStorage leavesABVS; + private final ElementNodeBuilder enb; + // Structure and data. private final GrowableIntArray leavesKind; private final GrowableIntArray leavesStart; private final GrowableIntArray leavesEnd; - private final GrowableIntArray leavesDepth; - private final GrowableIntArray leavesParent; +// private final GrowableIntArray leavesDepth; +// private final GrowableIntArray leavesParent; private final GrowableIntArray leavesAttributeCount; private final GrowableIntArray leavesChildrenCount; @@ -105,11 +96,10 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { private final int LEAF_POST_NODE = 7; public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider) { - docABVS = new ArrayBackedValueStorage(); this.createNodeIds = nodeIdProvider != null; this.attachTypes = attachTypes; this.nodeIdProvider = nodeIdProvider; - this.tempABVS = new ArrayBackedValueStorage(); + enb = new ElementNodeBuilder(); docb = new DocumentNodeBuilder(); tnb = new TextNodeBuilder(); cnb = new CommentNodeBuilder(); @@ -117,23 +107,22 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { anb = new AttributeNodeBuilder(); db = new DictionaryBuilder(); buffer = new StringBuilder(); - enbStack = new ArrayList<ElementNodeBuilder>(); - freeENBList = new ArrayList<ElementNodeBuilder>(); pendingText = false; - leavesKind = new GrowableIntArray(); - leavesStart = new GrowableIntArray(); + leavesKind = new GrowableIntArray(600); + leavesStart = new GrowableIntArray(600); leavesABVS = new ArrayBackedValueStorage(); - leavesEnd = new GrowableIntArray(); - leavesDepth = new GrowableIntArray(); - leavesParent = new GrowableIntArray(); - leavesAttributeCount = new GrowableIntArray(); - leavesChildrenCount = new GrowableIntArray(); - previousLeaf = new GrowableIntArray(); - childStartOffset = new GrowableIntArray(); - childSlotOffset = new GrowableIntArray(); + leavesEnd = new GrowableIntArray(600); +// leavesDepth = new GrowableIntArray(600); +// leavesParent = new GrowableIntArray(600); + leavesAttributeCount = new GrowableIntArray(600); + leavesChildrenCount = new GrowableIntArray(600); + previousLeaf = new GrowableIntArray(600); + childStartOffset = new GrowableIntArray(600); + childSlotOffset = new GrowableIntArray(600); textCount = 0; textCurrentDepth = 0; + childSlotCounter = 0; } @@ -147,28 +136,25 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { public void endDocument() throws SAXException { try { flushText(); - docb.endChildrenChunk(); - docb.finish(); + leafNodeStart(LEAF_POST_DOCUMENT); + leafNodeEnd(); + textCurrentDepth--; } catch (IOException e) { e.printStackTrace(); throw new SAXException(e); } - leafNodeStart(LEAF_POST_DOCUMENT); - leafNodeEnd(); - - textCurrentDepth--; - int[] k = leavesKind.getArray(); - int[] s = leavesStart.getArray(); - int[] e = leavesEnd.getArray(); - int[] d = leavesDepth.getArray(); - int[] p = leavesParent.getArray(); - int[] c = leavesChildrenCount.getArray(); - int[] a = leavesAttributeCount.getArray(); - for (int i = 0; i < s.length; ++i) { - System.err.println(i + " " + k[i] + " - " + d[i] + ":" + s[i] + ":" + e[i] + " p=" + p[i] + " a=" + a[i] - + " c=" + c[i]); - } + // int[] k = leavesKind.getArray(); + // int[] s = leavesStart.getArray(); + // int[] e = leavesEnd.getArray(); + // int[] d = leavesDepth.getArray(); + // int[] p = leavesParent.getArray(); + // int[] c = leavesChildrenCount.getArray(); + // int[] a = leavesAttributeCount.getArray(); + // for (int i = 0; i < s.length; ++i) { + // System.err.println(i + " " + k[i] + " - " + d[i] + ":" + s[i] + ":" + e[i] + " p=" + p[i] + " a=" + a[i] + // + " c=" + c[i]); + // } } @@ -176,20 +162,13 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { public void endElement(String uri, String localName, String name) throws SAXException { try { flushText(); - ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1); - enb.endChildrenChunk(); - endChildInParent(enb); - freeENB(enb); + leafNodeStart(LEAF_POST_NODE); + leafNodeEnd(); + textCurrentDepth--; } catch (IOException e) { e.printStackTrace(); throw new SAXException(e); } - - leafNodeStart(LEAF_POST_NODE); - leafNodeEnd(); - - textCurrentDepth--; - } @Override @@ -204,28 +183,15 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { public void processingInstruction(String target, String data) throws SAXException { try { flushText(); - startChildInParent(pinb); - tempABVS.reset(); - tempABVS.getDataOutput().writeUTF(target); - if (createNodeIds) { - pinb.setLocalNodeId(nodeIdCounter++); - } - pinb.setTarget(tempABVS); - tempABVS.reset(); - tempABVS.getDataOutput().writeUTF(data); - pinb.setContent(tempABVS); - endChildInParent(pinb); - // Save to leavesABVS leafNodeStart(LEAF_PI); pinb.reset(leavesABVS); if (createNodeIds) { - pinb.setLocalNodeId(nodeIdCounter); + pinb.setLocalNodeId(nodeIdCounter++); } pinb.setTarget(target); pinb.setContent(data); leafNodeEnd(); - } catch (IOException e) { e.printStackTrace(); throw new SAXException(e); @@ -242,22 +208,30 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { @Override public void startDocument() throws SAXException { + leavesKind.clear(); + leavesStart.clear(); + leavesABVS.reset(); + leavesEnd.clear(); +// leavesDepth.clear(); +// leavesParent.clear(); + leavesAttributeCount.clear(); + leavesChildrenCount.clear(); + previousLeaf.clear(); + childStartOffset.clear(); + childSlotOffset.clear(); + textCount = 0; + textCurrentDepth = 0; + childSlotCounter = 0; + textCurrentDepth++; try { nodeIdCounter = 0; db.reset(); - docABVS.reset(); - docb.reset(docABVS); - if (createNodeIds) { - docb.setLocalNodeId(nodeIdCounter++); - } - docb.startChildrenChunk(); leafNodeStart(LEAF_PRE_DOCUMENT); - DocumentNodeBuilder docb2 = new DocumentNodeBuilder(); - docb2.reset(leavesABVS); + docb.reset(leavesABVS); if (createNodeIds) { - docb2.setLocalNodeId(nodeIdCounter); + docb.setLocalNodeId(nodeIdCounter++); } leafNodeEnd(); @@ -275,40 +249,26 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { flushText(); int idx = name.indexOf(':'); String prefix = idx < 0 ? "" : name.substring(0, idx); - ElementNodeBuilder enb = createENB(); - startChildInParent(enb); int uriCode = db.lookup(uri); int localNameCode = db.lookup(localName); int prefixCode = db.lookup(prefix); - enb.setName(uriCode, localNameCode, prefixCode); - if (attachTypes) { - int typeUriCode = db.lookup(XQueryConstants.XS_NSURI); - int typeLocalNameCode = db.lookup(BuiltinTypeQNames.UNTYPED_STR); - int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX); - enb.setType(typeUriCode, typeLocalNameCode, typePrefixCode); - } - if (createNodeIds) { - enb.setLocalNodeId(nodeIdCounter++); - } // Save to leavesABVS leafNodeStart(LEAF_PRE_NODE); - ElementNodeBuilder enb2 = createENB(); - enb2.setMvs(leavesABVS); - enb2.setName(uriCode, localNameCode, prefixCode); + enb.setMvs(leavesABVS); + enb.setName(uriCode, localNameCode, prefixCode); if (attachTypes) { int typeUriCode = db.lookup(XQueryConstants.XS_NSURI); int typeLocalNameCode = db.lookup(BuiltinTypeQNames.UNTYPED_STR); int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX); - enb2.setType(typeUriCode, typeLocalNameCode, typePrefixCode); + enb.setType(typeUriCode, typeLocalNameCode, typePrefixCode); } if (createNodeIds) { - enb2.setLocalNodeId(nodeIdCounter); + enb.setLocalNodeId(nodeIdCounter++); } leafNodeEnd(); textCurrentDepth++; - enb.startAttributeChunk(); final int nAttrs = atts.getLength(); for (int i = 0; i < nAttrs; ++i) { String aName = atts.getQName(i); @@ -316,24 +276,6 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { int aPrefixCode = db.lookup(aIdx < 0 ? "" : aName.substring(0, aIdx)); int aLocalNameCode = db.lookup(atts.getLocalName(i)); int aUriCode = db.lookup(atts.getURI(i)); - String aValue = atts.getValue(i); - tempABVS.reset(); - DataOutput tempOut = tempABVS.getDataOutput(); - tempOut.write(ValueTag.XS_UNTYPED_ATOMIC_TAG); - tempOut.writeUTF(aValue); - enb.startAttribute(anb); - anb.setName(aUriCode, aLocalNameCode, aPrefixCode); - if (attachTypes) { - int typeUriCode = db.lookup(XQueryConstants.XS_NSURI); - int typeLocalNameCode = db.lookup(BuiltinTypeQNames.UNTYPED_ATOMIC_STR); - int typePrefixCode = db.lookup(XQueryConstants.XS_PREFIX); - anb.setType(typeUriCode, typeLocalNameCode, typePrefixCode); - } - if (createNodeIds) { - anb.setLocalNodeId(nodeIdCounter++); - } - anb.setValue(tempABVS); - enb.endAttribute(anb); // Save to leavesABVS leafNodeStart(LEAF_ATTRIBUTE); @@ -350,11 +292,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { } anb.setValue(atts.getValue(i)); leafNodeEnd(); - } - enb.endAttributeChunk(); - enb.startChildrenChunk(); - enbStack.add(enb); } catch (IOException e) { e.printStackTrace(); throw new SAXException(e); @@ -369,21 +307,13 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { public void comment(char[] ch, int start, int length) throws SAXException { try { flushText(); - startChildInParent(cnb); buffer.append(ch, start, length); - tempABVS.reset(); - tempABVS.getDataOutput().writeUTF(buffer.toString()); - if (createNodeIds) { - cnb.setLocalNodeId(nodeIdCounter++); - } - cnb.setValue(tempABVS); - endChildInParent(cnb); // Save to leavesABVS leafNodeStart(LEAF_COMMENT); cnb.reset(leavesABVS); if (createNodeIds) { - cnb.setLocalNodeId(nodeIdCounter); + cnb.setLocalNodeId(nodeIdCounter++); } cnb.setValue(buffer.toString()); leafNodeEnd(); @@ -397,20 +327,11 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { private void flushText() throws IOException { if (pendingText) { - peekENBStackTop().startChild(tnb); - tempABVS.reset(); - tempABVS.getDataOutput().writeUTF(buffer.toString()); - if (createNodeIds) { - tnb.setLocalNodeId(nodeIdCounter++); - } - tnb.setValue(tempABVS); - peekENBStackTop().endChild(tnb); - // Save to leavesABVS leafNodeStart(LEAF_TEXT); tnb.reset(leavesABVS); if (createNodeIds) { - tnb.setLocalNodeId(nodeIdCounter); + tnb.setLocalNodeId(nodeIdCounter++); } tnb.setValue(buffer.toString()); leafNodeEnd(); @@ -459,24 +380,6 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { out.writeInt(nodeIdProvider.getId()); } db.write(abvs); - out.write(docABVS.getByteArray(), docABVS.getStartOffset(), docABVS.getLength()); - } - - public void writeOnce(ArrayBackedValueStorage abvs) throws IOException { - DataOutput out = abvs.getDataOutput(); - out.write(ValueTag.NODE_TREE_TAG); - byte header = NodeTreePointable.HEADER_DICTIONARY_EXISTS_MASK; - if (attachTypes) { - header |= NodeTreePointable.HEADER_TYPE_EXISTS_MASK; - } - if (createNodeIds) { - header |= NodeTreePointable.HEADER_NODEID_EXISTS_MASK; - } - out.write(header); - if (createNodeIds) { - out.writeInt(nodeIdProvider.getId()); - } - db.write(abvs); for (int i = 0; i < leavesKind.getSize(); ++i) { if (leavesKind.getArray()[i] == LEAF_PRE_DOCUMENT) { @@ -485,12 +388,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { int children = leavesChildrenCount.getArray()[i]; if (children > 0) { - out.writeInt(children); - int offset = abvs.getLength(); - for (int s = 0; s < children; ++s) { - out.writeInt(-1); - addChildSlot(offset, s, children); - } + sequenceSlotStub(abvs, children); } // Continue with nodes. @@ -499,103 +397,78 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { int attrCount = leavesAttributeCount.getArray()[i]; int childrenCount = leavesChildrenCount.getArray()[i]; - ElementNodeBuilder enb2 = createENB(); - enb2.setMvs(abvs); - enb2.setTagHeader(nsCount, attrCount, childrenCount); + enb.setMvs(abvs); + enb.setTagHeader(nsCount, attrCount, childrenCount); out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i], leavesEnd.getArray()[i] - leavesStart.getArray()[i]); if (attrCount > 0) { - out.writeInt(attrCount); - int offset = abvs.getLength(); - for (int s = 0; s < attrCount; ++s) { - out.writeInt(-1); - addChildSlot(offset, s, attrCount); - } + sequenceSlotStub(abvs, attrCount); for (int s = 0; s < attrCount; ++s) { ++i; out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i], leavesEnd.getArray()[i] - leavesStart.getArray()[i]); + updateSequenceSlot(abvs); } } if (childrenCount > 0) { - out.writeInt(childrenCount); - int offset = abvs.getLength(); - for (int s = 0; s < childrenCount; ++s) { - out.writeInt(-1); - addChildSlot(offset, s, childrenCount); - } + sequenceSlotStub(abvs, childrenCount); } // Continue with nodes. + } else if (leavesKind.getArray()[i] == LEAF_POST_DOCUMENT) { + // no action + } else if (leavesKind.getArray()[i] == LEAF_POST_NODE) { + updateSequenceSlot(abvs); } else { - if (leavesKind.getArray()[i] != LEAF_POST_DOCUMENT && leavesKind.getArray()[i] != LEAF_POST_NODE) out.write(leavesABVS.getByteArray(), leavesStart.getArray()[i], leavesEnd.getArray()[i] - leavesStart.getArray()[i]); - finishChildSlot(abvs); + updateSequenceSlot(abvs); } } } - private void addChildSlot(int offset, int count, int total) { + private void sequenceSlotStub(ArrayBackedValueStorage abvs, int count) throws IOException { + DataOutput out = abvs.getDataOutput(); + out.writeInt(count); + int offset = abvs.getLength(); + for (int s = 0; s < count; ++s) { + out.writeInt(-1); + addSequenceSlot(offset, s, count); + } + // + // for (int i = 0; i < childSlotCounter; ++i) { + // System.err.println("\t\t" + i + " " + childStartOffset.getArray()[i] + " - " + // + childSlotOffset.getArray()[i]); + // } + } + + private void addSequenceSlot(int offset, int count, int total) { childStartOffset.insert(childSlotCounter, offset + total * SLOT_SIZE); childSlotOffset.insert(childSlotCounter, offset + (total - count - 1) * SLOT_SIZE); childSlotCounter++; - - for (int i = 0; i < childSlotCounter; ++i) { - System.err.println("\t\t" + i + " " + childStartOffset.getArray()[i] + " - " - + childSlotOffset.getArray()[i]); - } } - private void finishChildSlot(ArrayBackedValueStorage abvs) { + private void updateSequenceSlot(ArrayBackedValueStorage abvs) { + // for (int i = 0; i < childSlotCounter; ++i) { + // System.err.println("\t" + i + " " + childStartOffset.getArray()[i] + " - " + childSlotOffset.getArray()[i]); + // } childSlotCounter--; int length = abvs.getLength() - childStartOffset.getArray()[childSlotCounter]; IntegerPointable.setInteger(abvs.getByteArray(), childSlotOffset.getArray()[childSlotCounter], length); } - private ElementNodeBuilder createENB() { - if (freeENBList.isEmpty()) { - return new ElementNodeBuilder(); - } - return freeENBList.remove(freeENBList.size() - 1); - } - - private void freeENB(ElementNodeBuilder enb) { - freeENBList.add(enb); - } - - private ElementNodeBuilder peekENBStackTop() { - return enbStack.get(enbStack.size() - 1); - } - - private void startChildInParent(AbstractNodeBuilder anb) throws IOException { - if (enbStack.isEmpty()) { - docb.startChild(anb); - } else { - peekENBStackTop().startChild(anb); - } - } - - private void endChildInParent(AbstractNodeBuilder anb) throws IOException { - if (enbStack.isEmpty()) { - docb.endChild(anb); - } else { - peekENBStackTop().endChild(anb); - } - } - private void leafNodeStart(int kind) { leavesKind.append(kind); leavesStart.append(leavesABVS.getLength()); - leavesDepth.append(textCurrentDepth); +// leavesDepth.append(textCurrentDepth); leavesAttributeCount.append(0); leavesChildrenCount.append(0); int parent = previousLeaf.getArray()[textCurrentDepth - 1]; - leavesParent.append(parent); +// leavesParent.append(parent); if (kind == LEAF_POST_NODE || kind == LEAF_POST_DOCUMENT) { // Skip Count } else if (kind == LEAF_ATTRIBUTE) {
