Repository: cassandra Updated Branches: refs/heads/cassandra-2.1 2777e1e5d -> 5f82aa3b0 refs/heads/trunk dc1dad328 -> 434e04281
optimize AtomicBTree patch by Benedict Elliott Smith; reviewed by jbellis for CASSANDRA-6692 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/5f82aa3b Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/5f82aa3b Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/5f82aa3b Branch: refs/heads/cassandra-2.1 Commit: 5f82aa3b03031c9aa7439b4cb745a6c5641a7b76 Parents: 2777e1e Author: Jonathan Ellis <jbel...@apache.org> Authored: Mon Feb 17 20:40:42 2014 -0600 Committer: Jonathan Ellis <jbel...@apache.org> Committed: Mon Feb 17 20:40:55 2014 -0600 ---------------------------------------------------------------------- CHANGES.txt | 2 +- .../org/apache/cassandra/utils/btree/BTree.java | 59 +------------- .../apache/cassandra/utils/btree/BTreeSet.java | 2 +- .../apache/cassandra/utils/btree/Builder.java | 6 +- .../cassandra/utils/btree/NodeBuilder.java | 85 +++++++++++++------- .../cassandra/utils/btree/UpdateFunction.java | 29 +++++++ .../apache/cassandra/utils/LongBTreeTest.java | 17 +++- 7 files changed, 106 insertions(+), 94 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/5f82aa3b/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index d7bc77e..ea74c62 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,7 +2,7 @@ * Add flush directory distinct from compaction directories (CASSANDRA-6357) * Require JNA by default (CASSANDRA-6575) * add listsnapshots command to nodetool (CASSANDRA-5742) - * Introduce AtomicBTreeColumns (CASSANDRA-6271) + * Introduce AtomicBTreeColumns (CASSANDRA-6271, 6692) * Multithreaded commitlog (CASSANDRA-3578) * allocate fixed index summary memory pool and resample cold index summaries to use less memory (CASSANDRA-5519) http://git-wip-us.apache.org/repos/asf/cassandra/blob/5f82aa3b/src/java/org/apache/cassandra/utils/btree/BTree.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/BTree.java b/src/java/org/apache/cassandra/utils/btree/BTree.java index 5368690..82f5574 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTree.java +++ b/src/java/org/apache/cassandra/utils/btree/BTree.java @@ -128,7 +128,7 @@ public class BTree */ public static <V> Object[] update(Object[] btree, Comparator<V> comparator, Collection<V> updateWith, boolean updateWithIsSorted) { - return update(btree, comparator, updateWith, updateWithIsSorted, null); + return update(btree, comparator, updateWith, updateWithIsSorted, UpdateFunction.NoOp.<V>instance()); } /** @@ -154,63 +154,6 @@ public class BTree if (!updateWithIsSorted) updateWith = sorted(updateWith, comparator, updateWith.size()); - // if the b-tree is just a single root node, we can try a quick in-place merge - if (isLeaf(btree) && btree.length + updateWith.size() < QUICK_MERGE_LIMIT) - { - // since updateWith is sorted, we can skip elements from earlier iterations tracked by this offset - int btreeOffset = 0; - int keyEnd = getLeafKeyEnd(btree); - Object[] merged = new Object[QUICK_MERGE_LIMIT]; - int mergedCount = 0; - for (V v : updateWith) - { - // find the index i where v would belong in the original btree - int i = find(comparator, v, btree, btreeOffset, keyEnd); - boolean found = i >= 0; - if (!found) - i = -i - 1; - - // copy original elements up to i into the merged array - int count = i - btreeOffset; - if (count > 0) - { - System.arraycopy(btree, btreeOffset, merged, mergedCount, count); - mergedCount += count; - btreeOffset = i; - } - - if (found) - { - // apply replaceF if it matches an existing element - btreeOffset++; - if (updateF != null) - v = updateF.apply((V) btree[i], v); - } - else if (updateF != null) - { - // new element but still need to apply replaceF to handle indexing and size-tracking - v = updateF.apply(v); - } - - merged[mergedCount++] = v; - } - - // copy any remaining original elements - if (btreeOffset < keyEnd) - { - int count = keyEnd - btreeOffset; - System.arraycopy(btree, btreeOffset, merged, mergedCount, count); - mergedCount += count; - } - - assert mergedCount <= FAN_FACTOR; - - Object[] r = Arrays.copyOfRange(merged, 0, mergedCount + (mergedCount & 1)); - if (updateF != null) - updateF.allocated(ObjectSizes.sizeOfArray(r) - (btree.length == 0 ? 0 : ObjectSizes.sizeOfArray(btree))); - return r; - } - return modifier.get().update(btree, comparator, updateWith, updateF); } http://git-wip-us.apache.org/repos/asf/cassandra/blob/5f82aa3b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java index 6144741..e170556 100644 --- a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java +++ b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java @@ -38,7 +38,7 @@ public class BTreeSet<V> implements NavigableSet<V> public BTreeSet<V> update(Collection<V> updateWith, boolean isSorted) { - return new BTreeSet<>(BTree.update(tree, comparator, updateWith, isSorted, null), comparator); + return new BTreeSet<>(BTree.update(tree, comparator, updateWith, isSorted, UpdateFunction.NoOp.<V>instance()), comparator); } @Override http://git-wip-us.apache.org/repos/asf/cassandra/blob/5f82aa3b/src/java/org/apache/cassandra/utils/btree/Builder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/Builder.java b/src/java/org/apache/cassandra/utils/btree/Builder.java index 61612f9..03a7941 100644 --- a/src/java/org/apache/cassandra/utils/btree/Builder.java +++ b/src/java/org/apache/cassandra/utils/btree/Builder.java @@ -57,6 +57,8 @@ final class Builder */ public <V> Object[] update(Object[] btree, Comparator<V> comparator, Collection<V> source, UpdateFunction<V> updateF) { + assert updateF != null; + NodeBuilder current = rootBuilder; current.reset(btree, POSITIVE_INFINITY, updateF, comparator); @@ -64,7 +66,7 @@ final class Builder { while (true) { - if (updateF != null && updateF.abortEarly()) + if (updateF.abortEarly()) { rootBuilder.clear(); return null; @@ -103,7 +105,7 @@ final class Builder while ((size >>= FAN_SHIFT) > 0) current = current.ensureChild(); - current.reset(EMPTY_LEAF, POSITIVE_INFINITY, null, null); + current.reset(EMPTY_LEAF, POSITIVE_INFINITY, UpdateFunction.NoOp.instance(), null); for (V key : source) current.addNewKey(key); http://git-wip-us.apache.org/repos/asf/cassandra/blob/5f82aa3b/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java b/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java index fa421da..759ffaa 100644 --- a/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java +++ b/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java @@ -49,7 +49,6 @@ final class NodeBuilder // we null out the contents of buildKeys/buildChildren when clear()ing them for re-use; this is where // we track how much we actually have to null out private int maxBuildKeyPosition; - private int maxBuildChildPosition; // current node of the btree we're modifying/copying from private Object[] copyFrom; @@ -76,8 +75,8 @@ final class NodeBuilder { current.reset(null, null, null, null); Arrays.fill(current.buildKeys, 0, current.maxBuildKeyPosition, null); - Arrays.fill(current.buildChildren, 0, current.maxBuildChildPosition, null); - current.maxBuildChildPosition = current.maxBuildKeyPosition = 0; + Arrays.fill(current.buildChildren, 0, current.maxBuildKeyPosition + 1, null); + current.maxBuildKeyPosition = 0; } current = current.child; } @@ -91,7 +90,6 @@ final class NodeBuilder this.updateFunction = updateFunction; this.comparator = comparator; maxBuildKeyPosition = Math.max(maxBuildKeyPosition, buildKeyPosition); - maxBuildChildPosition = Math.max(maxBuildChildPosition, buildChildPosition); buildKeyPosition = 0; buildChildPosition = 0; copyFromKeyPosition = 0; @@ -114,7 +112,16 @@ final class NodeBuilder int i = find(comparator, key, copyFrom, copyFromKeyPosition, copyFromKeyEnd); boolean found = i >= 0; // exact key match? boolean owns = true; // true iff this node (or a child) should contain the key - if (!found) + if (found) + { + Object prev = copyFrom[i]; + Object next = updateFunction.apply(prev, key); + // we aren't actually replacing anything, so leave our state intact and continue + if (prev == next) + return null; + key = next; + } + else { i = -i - 1; if (i == copyFromKeyEnd && compare(comparator, upperBound, key) <= 0) @@ -123,19 +130,35 @@ final class NodeBuilder if (isLeaf(copyFrom)) { - // copy keys from the original node up to prior to the found index - copyKeys(i); if (owns) { + // copy keys from the original node up to prior to the found index + copyKeys(i); + if (found) + { + // if found, we've applied updateFunction already replaceNextKey(key); + } else + { + // if not found, we need to apply updateFunction still + key = updateFunction.apply(key); addNewKey(key); // handles splitting parent if necessary via ensureRoom + } // done, so return null return null; } + else + { + // we don't want to copy anything if we're ascending and haven't copied anything previously, + // as in this case we can return the original node. Leaving buildKeyPosition as 0 indicates + // to buildFromRange that it should return the original instead of building a new node + if (buildKeyPosition > 0) + copyKeys(i); + } // if we don't own it, all we need to do is ensure we've copied everything in this node // (which we have done, since not owning means pos >= keyEnd), ascend, and let Modifier.update @@ -163,9 +186,10 @@ final class NodeBuilder ensureChild().reset(descendInto, newUpperBound, updateFunction, comparator); return child; } - else + else if (buildKeyPosition > 0 || buildChildPosition > 0) { - // ensure we've copied all keys and children + // ensure we've copied all keys and children, but only if we've already copied something. + // otherwise we want to return the original node copyKeys(copyFromKeyEnd); copyChildren(copyFromKeyEnd + 1); // since we know that there are exactly 1 more child nodes, than keys } @@ -201,7 +225,7 @@ final class NodeBuilder // builds a new root BTree node - must be called on root of operation Object[] toNode() { - assert buildKeyPosition <= FAN_FACTOR && buildKeyPosition > 0 : buildKeyPosition; + assert buildKeyPosition <= FAN_FACTOR && (buildKeyPosition > 0 || copyFrom.length > 0) : buildKeyPosition; return buildFromRange(0, buildKeyPosition, isLeaf(copyFrom), false); } @@ -234,9 +258,12 @@ final class NodeBuilder assert len <= FAN_FACTOR : upToKeyPosition + "," + copyFromKeyPosition; ensureRoom(buildKeyPosition + len); - System.arraycopy(copyFrom, copyFromKeyPosition, buildKeys, buildKeyPosition, len); - copyFromKeyPosition = upToKeyPosition; - buildKeyPosition += len; + if (len > 0) + { + System.arraycopy(copyFrom, copyFromKeyPosition, buildKeys, buildKeyPosition, len); + copyFromKeyPosition = upToKeyPosition; + buildKeyPosition += len; + } } // skips the next key in copyf, and puts the provided key in the builder instead @@ -244,8 +271,6 @@ final class NodeBuilder { // (this first part differs from addNewKey in that we pass the replaced object to replaceF as well) ensureRoom(buildKeyPosition + 1); - if (updateFunction != null) - with = updateFunction.apply(copyFrom[copyFromKeyPosition], with); buildKeys[buildKeyPosition++] = with; copyFromKeyPosition++; @@ -255,8 +280,6 @@ final class NodeBuilder void addNewKey(Object key) { ensureRoom(buildKeyPosition + 1); - if (updateFunction != null) - key = updateFunction.apply(key); buildKeys[buildKeyPosition++] = key; } @@ -267,9 +290,12 @@ final class NodeBuilder if (copyFromChildPosition >= upToChildPosition) return; int len = upToChildPosition - copyFromChildPosition; - System.arraycopy(copyFrom, getKeyEnd(copyFrom) + copyFromChildPosition, buildChildren, buildChildPosition, len); - copyFromChildPosition = upToChildPosition; - buildChildPosition += len; + if (len > 0) + { + System.arraycopy(copyFrom, getKeyEnd(copyFrom) + copyFromChildPosition, buildChildren, buildChildPosition, len); + copyFromChildPosition = upToChildPosition; + buildChildPosition += len; + } } // adds a new and unexpected child to the builder - called by children that overflow @@ -305,13 +331,17 @@ final class NodeBuilder { System.arraycopy(buildChildren, size, buildChildren, 0, buildChildPosition - size); buildChildPosition -= size; - maxBuildChildPosition = buildChildren.length; } } // builds and returns a node from the buffered objects in the given range private Object[] buildFromRange(int offset, int keyLength, boolean isLeaf, boolean isExtra) { + // if keyLength is 0, we didn't copy anything from the original, which means we didn't + // modify any of the range owned by it, so can simply return it as is + if (keyLength == 0) + return copyFrom; + Object[] a; if (isLeaf) { @@ -324,14 +354,11 @@ final class NodeBuilder System.arraycopy(buildKeys, offset, a, 0, keyLength); System.arraycopy(buildChildren, offset, a, keyLength, keyLength + 1); } - if (updateFunction != null) - { - if (isExtra) - updateFunction.allocated(ObjectSizes.sizeOfArray(a)); - else if (a.length != copyFrom.length) - updateFunction.allocated(ObjectSizes.sizeOfArray(a) - - (copyFrom.length == 0 ? 0 : ObjectSizes.sizeOfArray(copyFrom))); - } + if (isExtra) + updateFunction.allocated(ObjectSizes.sizeOfArray(a)); + else if (a.length != copyFrom.length) + updateFunction.allocated(ObjectSizes.sizeOfArray(a) - + (copyFrom.length == 0 ? 0 : ObjectSizes.sizeOfArray(copyFrom))); return a; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/5f82aa3b/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java b/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java index 7746c59..e4062a4 100644 --- a/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java +++ b/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java @@ -45,4 +45,33 @@ public interface UpdateFunction<V> extends Function<V, V> */ void allocated(long heapSize); + public static final class NoOp<V> implements UpdateFunction<V> + { + + private static final NoOp INSTANCE = new NoOp(); + public static <V> NoOp<V> instance() + { + return INSTANCE; + } + + public V apply(V replacing, V update) + { + return update; + } + + public V apply(V update) + { + return update; + } + + public boolean abortEarly() + { + return false; + } + + public void allocated(long heapSize) + { + } + } + } http://git-wip-us.apache.org/repos/asf/cassandra/blob/5f82aa3b/test/long/org/apache/cassandra/utils/LongBTreeTest.java ---------------------------------------------------------------------- diff --git a/test/long/org/apache/cassandra/utils/LongBTreeTest.java b/test/long/org/apache/cassandra/utils/LongBTreeTest.java index 55126ad..d3673fa 100644 --- a/test/long/org/apache/cassandra/utils/LongBTreeTest.java +++ b/test/long/org/apache/cassandra/utils/LongBTreeTest.java @@ -70,13 +70,13 @@ public class LongBTreeTest @Test public void testIndividualInsertsSmallOverlappingRange() throws ExecutionException, InterruptedException { - testInsertions(100000000, 50, 1, 1, true); + testInsertions(10000000, 50, 1, 1, true); } @Test public void testBatchesSmallOverlappingRange() throws ExecutionException, InterruptedException { - testInsertions(100000000, 50, 1, 5, true); + testInsertions(10000000, 50, 1, 5, true); } @Test @@ -277,19 +277,30 @@ public class LongBTreeTest } } - private static <V> void testEqual(String id, Iterator<V> btree, Iterator<V> canon) + private static <V> boolean testEqual(String id, Iterator<V> btree, Iterator<V> canon) { + boolean equal = true; while (btree.hasNext() && canon.hasNext()) { Object i = btree.next(); Object j = canon.next(); if (!i.equals(j)) + { System.out.println(String.format("%s: Expected %d, Got %d", id, j, i)); + equal = false; + } } while (btree.hasNext()) + { System.out.println(String.format("%s: Expected <Nil>, Got %d", id, btree.next())); + equal = false; + } while (canon.hasNext()) + { System.out.println(String.format("%s: Expected %d, Got Nil", id, canon.next())); + equal = false; + } + return equal; } // should only be called on sets that range from 0->N or N->0