Repository: incubator-vxquery Updated Branches: refs/heads/prestonc/parser 8e304a701 -> 4abc5286a
Switched out the binary search in dictionary builder for a tree map that has constant look up and items are sorted. Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/63f51a7a Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/63f51a7a Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/63f51a7a Branch: refs/heads/prestonc/parser Commit: 63f51a7a134451ec463d7896c0547aca7bad524f Parents: bc1ff5a Author: Preston Carman <[email protected]> Authored: Mon Feb 17 10:05:13 2014 -0800 Committer: Preston Carman <[email protected]> Committed: Mon Feb 17 10:05:13 2014 -0800 ---------------------------------------------------------------------- .../builders/nodes/DictionaryBuilder.java | 45 +++++++++++++------- 1 file changed, 29 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/63f51a7a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java index 20b7333..6a98b3d 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java @@ -19,6 +19,10 @@ package org.apache.vxquery.datamodel.builders.nodes; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeMap; import org.apache.vxquery.util.GrowableIntArray; @@ -40,9 +44,7 @@ public class DictionaryBuilder { private final ByteArrayAccessibleOutputStream tempStringData; - private final DataOutput tempOut; - - private final UTF8StringPointable tempStringPointable; + private final TreeMap<String, Integer> hashSlotIndexes; private final IValueReferenceVector sortedStringsVector = new IValueReferenceVector() { @Override @@ -75,15 +77,15 @@ public class DictionaryBuilder { dataBuffer = new ByteArrayAccessibleOutputStream(); dataBufferOut = new DataOutputStream(dataBuffer); tempStringData = new ByteArrayAccessibleOutputStream(); - tempOut = new DataOutputStream(tempStringData); - tempStringPointable = (UTF8StringPointable) UTF8StringPointable.FACTORY.createPointable(); + hashSlotIndexes = new TreeMap<String, Integer>(); } - + public void reset() { stringEndOffsets.clear(); sortedSlotIndexes.clear(); dataBuffer.reset(); tempStringData.reset(); + hashSlotIndexes.clear(); } public void write(ArrayBackedValueStorage abvs) throws IOException { @@ -96,23 +98,34 @@ public class DictionaryBuilder { for (int i = 0; i < entryCount; ++i) { out.writeInt(entryOffsets[i]); } - int[] sortedOffsets = sortedSlotIndexes.getArray(); - for (int i = 0; i < entryCount; ++i) { - out.writeInt(sortedOffsets[i]); + if (hashSlotIndexes.isEmpty()) { + int[] sortedOffsets = sortedSlotIndexes.getArray(); + for (int i = 0; i < entryCount; ++i) { + out.writeInt(sortedOffsets[i]); + } + } else { + for (Entry<String, Integer> me : hashSlotIndexes.entrySet()) { + out.writeInt((Integer) me.getValue()); + } } out.write(dataBuffer.getByteArray(), 0, dataBuffer.size()); IntegerPointable.setInteger(abvs.getByteArray(), sizeOffset, abvs.getLength() - sizeOffset); } public int lookup(String str) { - tempStringData.reset(); - try { - tempOut.writeUTF(str); - } catch (IOException e) { - throw new IllegalStateException(e); + Integer slotIndex = hashSlotIndexes.get(str); + if (slotIndex == null) { + try { + dataBufferOut.writeUTF(str); + slotIndex = stringEndOffsets.getSize(); + dataBufferOut.writeInt(slotIndex); + } catch (IOException e) { + throw new IllegalStateException(e); + } + stringEndOffsets.append(dataBuffer.size()); + hashSlotIndexes.put(str, slotIndex); } - tempStringPointable.set(tempStringData.getByteArray(), 0, tempStringData.size()); - return lookup(tempStringPointable); + return slotIndex; } public int lookup(UTF8StringPointable str) {
