Repository: incubator-vxquery
Updated Branches:
  refs/heads/prestonc/benchmark cf09bd9ae -> 8b86884a1


More clean up and fixes for new SAXContentHandler features.

- Made doc and doc-available work with the old SAXContentHandler method. 
(Basically the element writer does not affect the pervious version.)
- Better variable and function naming.
- Attempt to make it clean to create XMLParser and SAXContentHandler.


Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/8b86884a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/8b86884a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/8b86884a

Branch: refs/heads/prestonc/benchmark
Commit: 8b86884a11ccf08e6b315fefdfb62184083f3f0d
Parents: cf09bd9
Author: Preston Carman <[email protected]>
Authored: Tue May 20 18:42:34 2014 -0700
Committer: Preston Carman <[email protected]>
Committed: Tue May 20 18:42:34 2014 -0700

----------------------------------------------------------------------
 .../VXQueryCollectionOperatorDescriptor.java    | 10 +-
 .../FnDocAvailableScalarEvaluatorFactory.java   | 10 +-
 .../runtime/functions/util/FunctionHelper.java  |  4 +-
 .../vxquery/xmlparser/SAXContentHandler.java    | 96 +++++++++++---------
 .../org/apache/vxquery/xmlparser/XMLParser.java | 25 ++++-
 5 files changed, 88 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/8b86884a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
index 1f4bb2f..a9c7255 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/metadata/VXQueryCollectionOperatorDescriptor.java
@@ -71,20 +71,16 @@ public class VXQueryCollectionOperatorDescriptor extends 
AbstractSingleActivityO
         final ITreeNodeIdProvider nodeIdProvider = new 
TreeNodeIdProvider(partitionId, dataSourceId, totalDataSources);
         final String nodeId = 
ctx.getJobletContext().getApplicationContext().getNodeId();
         final DynamicContext dCtx = (DynamicContext) 
ctx.getJobletContext().getGlobalJobData();
-        final List<SequenceType> childSequenceTypes = new 
ArrayList<SequenceType>();
 
         final String collectionName = collectionPartitions[partition % 
collectionPartitions.length];
-        final XMLParser parser = new XMLParser(false, nodeIdProvider);;
+        final XMLParser parser = new XMLParser(false, nodeIdProvider, frame, 
appender, childSeq,
+                dCtx.getStaticContext());
 
         return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
             @Override
             public void open() throws HyracksDataException {
                 appender.reset(frame, true);
                 writer.open();
-
-                for (int typeCode : childSeq) {
-                    
childSequenceTypes.add(dCtx.getStaticContext().lookupSequenceType(typeCode));
-                }
             }
 
             @Override
@@ -100,7 +96,7 @@ public class VXQueryCollectionOperatorDescriptor extends 
AbstractSingleActivityO
                         Iterator<File> it = 
FileUtils.iterateFiles(collectionDirectory, new VXQueryIOFileFilter(),
                                 TrueFileFilter.INSTANCE);
                         while (it.hasNext()) {
-                            parser.parseOutElements(it.next(), frame, 
appender, writer, fta, t, childSequenceTypes);
+                            parser.parseOutElements(it.next(), writer, fta, t);
                         }
                     }
                 } else {

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/8b86884a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java
index 499119c..ad4b1f0 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/node/FnDocAvailableScalarEvaluatorFactory.java
@@ -27,6 +27,9 @@ import org.apache.vxquery.exceptions.SystemException;
 import 
org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluator;
 import 
org.apache.vxquery.runtime.functions.base.AbstractTaggedValueArgumentScalarEvaluatorFactory;
 import org.apache.vxquery.runtime.functions.util.FunctionHelper;
+import org.apache.vxquery.xmlparser.ITreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.TreeNodeIdProvider;
+import org.apache.vxquery.xmlparser.XMLParser;
 
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
 import edu.uci.ics.hyracks.algebricks.runtime.base.IScalarEvaluator;
@@ -52,6 +55,8 @@ public class FnDocAvailableScalarEvaluatorFactory extends 
AbstractTaggedValueArg
         final SequencePointable seqp = (SequencePointable) 
SequencePointable.FACTORY.createPointable();
         final ByteBufferInputStream bbis = new ByteBufferInputStream();
         final DataInputStream di = new DataInputStream(bbis);
+        final int partition = 
ctx.getTaskAttemptId().getTaskId().getPartition();
+        final ITreeNodeIdProvider nodeIdProvider = new 
TreeNodeIdProvider((short) partition);
 
         return new AbstractTaggedValueArgumentScalarEvaluator(args) {
             @Override
@@ -69,9 +74,10 @@ public class FnDocAvailableScalarEvaluatorFactory extends 
AbstractTaggedValueArg
                 if (tvp.getTag() != ValueTag.XS_STRING_TAG) {
                     throw new SystemException(ErrorCode.FORG0006);
                 }
-
+                tvp.getValue(stringp);
                 try {
-                    FunctionHelper.readInDocFromPointable(stringp, bbis, di, 
abvs, null);
+                    XMLParser parser = new XMLParser(false, nodeIdProvider);
+                    FunctionHelper.readInDocFromPointable(stringp, bbis, di, 
abvs, parser);
                     XDMConstants.setTrue(result);
                 } catch (Exception e) {
                     XDMConstants.setFalse(result);

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/8b86884a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
index 4953c12..8074eab 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
@@ -1232,8 +1232,8 @@ public class FunctionHelper {
         System.err.println(" printUTF8String END");
     }
 
-    public static void readInDocFromPointable(UTF8StringPointable stringp, 
ByteBufferInputStream bbis, DataInputStream di,
-            ArrayBackedValueStorage abvs, XMLParser parser) throws 
HyracksDataException {
+    public static void readInDocFromPointable(UTF8StringPointable stringp, 
ByteBufferInputStream bbis,
+            DataInputStream di, ArrayBackedValueStorage abvs, XMLParser 
parser) throws HyracksDataException {
         String fName;
         try {
             fName = getStringFromPointable(stringp, bbis, di);

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/8b86884a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index 58a4f03..bd1a354 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
@@ -96,7 +96,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     private final ArrayBackedValueStorage resultABVS;
 
-    private boolean writeMode;
+    private boolean skipping;
 
     private boolean[] subElement = null;
 
@@ -129,12 +129,20 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         freeENBList = new ArrayList<ElementNodeBuilder>();
         pendingText = false;
         tvp = (TaggedValuePointable) 
TaggedValuePointable.FACTORY.createPointable();
-        writeMode = false;
+        skipping = true;
+    }
+
+    public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider 
nodeIdProvider, ByteBuffer frame,
+            FrameTupleAppender appender, List<SequenceType> 
childSequenceTypes) {
+        this(attachTypes, nodeIdProvider);
+        this.frame = frame;
+        this.appender = appender;
+        setChildPathSteps(childSequenceTypes);
     }
 
     @Override
     public void characters(char[] ch, int start, int length) throws 
SAXException {
-        if (writeMode) {
+        if (!skipping) {
             buffer.append(ch, start, length);
             pendingText = true;
         }
@@ -142,12 +150,14 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void endDocument() throws SAXException {
-        if (writeMode) {
+        if (!skipping) {
             try {
                 flushText();
                 docb.endChildrenChunk();
                 docb.finish();
-                writeElement();
+                if (frame != null && appender != null) {
+                    writeElement();
+                }
             } catch (IOException e) {
                 e.printStackTrace();
                 throw new SAXException(e);
@@ -155,22 +165,25 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         }
     }
 
+    private void endElementChildPathStep() throws IOException {
+        if (foundFirstNonSkippedElement()) {
+            writeElement();
+        }
+        if (subElement != null && depth <= subElement.length) {
+            subElement[depth - 1] = false;
+        }
+    }
+
     @Override
     public void endElement(String uri, String localName, String name) throws 
SAXException {
-        if (writeMode) {
+        if (!skipping) {
             try {
                 flushText();
                 ElementNodeBuilder enb = enbStack.remove(enbStack.size() - 1);
                 enb.endChildrenChunk();
                 endChildInParent(enb);
-
-                if (foundChildPathStep()) {
-                    writeElement();
-                }
-                if (subElement != null && depth <= subElement.length) {
-                    subElement[depth - 1] = false;
-                }
                 freeENB(enb);
+                endElementChildPathStep();
             } catch (IOException e) {
                 e.printStackTrace();
                 throw new SAXException(e);
@@ -189,7 +202,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void processingInstruction(String target, String data) throws 
SAXException {
-        if (writeMode) {
+        if (!skipping) {
             try {
                 flushText();
                 startChildInParent(pinb);
@@ -221,43 +234,47 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     @Override
     public void startDocument() throws SAXException {
         if (subElement == null) {
-            writeMode = true;
+            skipping = false;
         }
-        try {
-            db.reset();
-            docABVS.reset();
-            if (writeMode) {
+        db.reset();
+        docABVS.reset();
+        if (!skipping) {
+            try {
                 docb.reset(docABVS);
                 if (createNodeIds) {
                     docb.setLocalNodeId(nodeIdCounter++);
                 }
                 docb.startChildrenChunk();
                 flushText();
+            } catch (IOException e) {
+                e.printStackTrace();
+                throw new SAXException(e);
             }
-        } catch (IOException e) {
-            e.printStackTrace();
-            throw new SAXException(e);
         }
     }
 
-    @Override
-    public void startElement(String uri, String localName, String name, 
Attributes atts) throws SAXException {
-        depth++;
-        // Check path step if it exists.
+    private boolean startElementChildPathStep(String uri, String localName) {
         if (subElement != null && depth <= subElement.length) {
+            // Check path step if it exists.
             if (uri.compareTo(childUri[depth - 1]) == 0) {
                 if (localName.compareTo(childLocalName[depth - 1]) == 0) {
                     subElement[depth - 1] = true;
                 }
             }
         }
-
-        boolean start = foundChildPathStep();
+        boolean start = foundFirstNonSkippedElement();
         if (start) {
-            writeMode = true;
+            skipping = false;
         }
+        return start;
+    }
 
-        if (writeMode) {
+    @Override
+    public void startElement(String uri, String localName, String name, 
Attributes atts) throws SAXException {
+        depth++;
+        boolean start = startElementChildPathStep(uri, localName);
+
+        if (!skipping) {
             try {
                 flushText();
                 int idx = name.indexOf(':');
@@ -320,7 +337,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     @Override
     public void comment(char[] ch, int start, int length) throws SAXException {
-        if (writeMode) {
+        if (!skipping) {
             try {
                 flushText();
                 startChildInParent(cnb);
@@ -379,7 +396,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     public void startEntity(String name) throws SAXException {
     }
 
-    public void setChildPathSteps(List<SequenceType> childSeq) {
+    private void setChildPathSteps(List<SequenceType> childSeq) {
         //        this.childSeq = childSeq;
         if (!childSeq.isEmpty()) {
             subElement = new boolean[childSeq.size()];
@@ -398,10 +415,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         }
     }
 
-    public void setupElementWriter(ByteBuffer frame, FrameTupleAppender 
appender, IFrameWriter writer,
-            FrameTupleAccessor fta, int t) throws IOException {
-        this.frame = frame;
-        this.appender = appender;
+    public void setupElementWriter(IFrameWriter writer, FrameTupleAccessor 
fta, int t) {
         this.writer = writer;
         this.fta = fta;
         this.t = t;
@@ -430,7 +444,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         }
         tvp.set(resultABVS.getByteArray(), resultABVS.getStartOffset(), 
resultABVS.getLength());
         addNodeToTuple(tvp, t);
-        writeMode = false;
+        skipping = true;
     }
 
     public void writeDocument(ArrayBackedValueStorage abvs) throws IOException 
{
@@ -470,8 +484,8 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         startChildInParent(anb, false);
     }
 
-    private void startChildInParent(AbstractNodeBuilder anb, boolean track) 
throws IOException {
-        if (track) {
+    private void startChildInParent(AbstractNodeBuilder anb, boolean 
startNewElement) throws IOException {
+        if (startNewElement) {
             elementABVS.reset();
             anb.reset(elementABVS);
         } else if (enbStack.isEmpty()) {
@@ -521,8 +535,8 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     /**
      * Determines if the correct path step is active.
      */
-    private boolean foundChildPathStep() {
-        if (subElement.length != depth) {
+    private boolean foundFirstNonSkippedElement() {
+        if (subElement == null || subElement.length != depth) {
             // Not the correct depth.
             return false;
         }

http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/8b86884a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
index edef1a1..c1fd6f0 100644
--- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
+++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/XMLParser.java
@@ -20,9 +20,11 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.GZIPInputStream;
 
+import org.apache.vxquery.context.StaticContext;
 import org.apache.vxquery.exceptions.VXQueryFileNotFoundException;
 import org.apache.vxquery.exceptions.VXQueryParseException;
 import org.apache.vxquery.types.SequenceType;
@@ -43,9 +45,23 @@ public class XMLParser {
     final InputSource in;
 
     public XMLParser(boolean attachTypes, ITreeNodeIdProvider idProvider) 
throws HyracksDataException {
+        this(attachTypes, idProvider, null, null, null, null);
+    }
+
+    public XMLParser(boolean attachTypes, ITreeNodeIdProvider idProvider, 
ByteBuffer frame,
+            FrameTupleAppender appender, List<Integer> childSeq, StaticContext 
staticContext)
+            throws HyracksDataException {
         try {
             parser = XMLReaderFactory.createXMLReader();
-            handler = new SAXContentHandler(attachTypes, idProvider);
+            if (frame == null || appender == null) {
+                handler = new SAXContentHandler(attachTypes, idProvider);
+            } else {
+                List<SequenceType> childSequenceTypes = new 
ArrayList<SequenceType>();
+                for (int typeCode : childSeq) {
+                    
childSequenceTypes.add(staticContext.lookupSequenceType(typeCode));
+                }
+                handler = new SAXContentHandler(attachTypes, idProvider, 
frame, appender, childSequenceTypes);
+            }
             parser.setContentHandler(handler);
             
parser.setProperty("http://xml.org/sax/properties/lexical-handler";, handler);
             in = new InputSource();
@@ -72,16 +88,15 @@ public class XMLParser {
         }
     }
 
-    public void parseOutElements(File file, ByteBuffer frame, 
FrameTupleAppender appender, IFrameWriter writer,
-            FrameTupleAccessor fta, int t, List<SequenceType> childSeq) throws 
HyracksDataException {
+    public void parseOutElements(File file, IFrameWriter writer, 
FrameTupleAccessor fta, int t)
+            throws HyracksDataException {
         try {
             if (file.getName().toLowerCase().endsWith(".xml.gz")) {
                 in.setCharacterStream(new InputStreamReader(new 
GZIPInputStream(new FileInputStream(file))));
             } else {
                 in.setCharacterStream(new InputStreamReader(new 
FileInputStream(file)));
             }
-            handler.setChildPathSteps(childSeq);
-            handler.setupElementWriter(frame, appender, writer, fta, t);
+            handler.setupElementWriter(writer, fta, t);
             parser.parse(in);
         } catch (FileNotFoundException e) {
             throw new VXQueryFileNotFoundException(e, file);

Reply via email to