Repository: cassandra
Updated Branches:
  refs/heads/trunk f25a765b6 -> 2e59ea8c7


Improve BTree build performance by avoiding data copy

patch by Jay Zhuang; reviewed by Benedict for CASSANDRA-9989


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2e59ea8c
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2e59ea8c
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2e59ea8c

Branch: refs/heads/trunk
Commit: 2e59ea8c7f21cb11b7ce71a5cdf303a8ed453bc0
Parents: f25a765
Author: Jay Zhuang <jay.zhu...@yahoo.com>
Authored: Tue Jun 12 14:10:06 2018 -0700
Committer: Jay Zhuang <jay.zhu...@yahoo.com>
Committed: Thu Aug 30 07:48:10 2018 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   1 +
 build.xml                                       |   2 +-
 .../org/apache/cassandra/utils/btree/BTree.java | 129 +++-
 .../cassandra/utils/btree/TreeBuilder.java      |  28 +-
 .../apache/cassandra/utils/LongBTreeTest.java   | 226 ++++---
 .../test/microbench/BTreeBuildBench.java        |   6 +
 .../org/apache/cassandra/utils/BTreeTest.java   | 503 ---------------
 .../apache/cassandra/utils/btree/BTreeTest.java | 608 +++++++++++++++++++
 8 files changed, 860 insertions(+), 643 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1e8e91d..5fb84a2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 4.0
+ * Improve BTree build performance by avoiding data copy (CASSANDRA-9989)
  * Make monotonic read / read repair configurable (CASSANDRA-14635)
  * Refactor CompactionStrategyManager (CASSANDRA-14621)
  * Flush netty client messages immediately by default (CASSANDRA-13651)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/build.xml
----------------------------------------------------------------------
diff --git a/build.xml b/build.xml
index 5fb9edf..b53c47b 100644
--- a/build.xml
+++ b/build.xml
@@ -102,7 +102,7 @@
 
     <property name="test.timeout" value="240000" />
     <property name="test.long.timeout" value="600000" />
-    <property name="test.burn.timeout" value="600000" />
+    <property name="test.burn.timeout" value="60000000" />
 
     <!-- default for cql tests. Can be override by 
-Dcassandra.test.use_prepared=false -->
     <property name="cassandra.test.use_prepared" value="true" />

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/src/java/org/apache/cassandra/utils/btree/BTree.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/BTree.java 
b/src/java/org/apache/cassandra/utils/btree/BTree.java
index 0c620bc..6d0af6e 100644
--- a/src/java/org/apache/cassandra/utils/btree/BTree.java
+++ b/src/java/org/apache/cassandra/utils/btree/BTree.java
@@ -34,6 +34,7 @@ import static com.google.common.collect.Iterables.filter;
 import static com.google.common.collect.Iterables.transform;
 import static java.lang.Math.max;
 import static java.lang.Math.min;
+import static java.util.Comparator.naturalOrder;
 
 public class BTree
 {
@@ -57,18 +58,42 @@ public class BTree
 
     // The maximum fan factor used for B-Trees
     static final int FAN_SHIFT;
+
+    // The maximun tree size for certain heigth of tree
+    static final int[] TREE_SIZE;
+
+    // NB we encode Path indexes as Bytes, so this needs to be less than 
Byte.MAX_VALUE / 2
+    static final int FAN_FACTOR;
+
+    static final int MAX_TREE_SIZE = Integer.MAX_VALUE;
+
     static
     {
-        int fanfactor = 32;
-        if (System.getProperty("cassandra.btree.fanfactor") != null)
-            fanfactor = 
Integer.parseInt(System.getProperty("cassandra.btree.fanfactor"));
+        int fanfactor = 
Integer.parseInt(System.getProperty("cassandra.btree.fanfactor", "32"));
+        assert fanfactor >= 2 : "the minimal btree fanfactor is 2";
         int shift = 1;
         while (1 << shift < fanfactor)
             shift += 1;
+
         FAN_SHIFT = shift;
+        FAN_FACTOR = 1 << FAN_SHIFT;
+
+        // For current FAN_FACTOR, calculate the maximum height of the tree we 
could build
+        int maxHeight = 0;
+        for (long maxSize = 0; maxSize < MAX_TREE_SIZE; maxHeight++)
+            // each tree node could have (FAN_FACTOR + 1) children,
+            // plus current node could have FAN_FACTOR number of values
+            maxSize = maxSize * (FAN_FACTOR + 1) + FAN_FACTOR;
+
+        TREE_SIZE = new int[maxHeight];
+
+        TREE_SIZE[0] = FAN_FACTOR;
+        for (int i = 1; i < maxHeight - 1; i++)
+            TREE_SIZE[i] = TREE_SIZE[i - 1] * (FAN_FACTOR + 1) + FAN_FACTOR;
+
+        TREE_SIZE[maxHeight - 1] = MAX_TREE_SIZE;
     }
-    // NB we encode Path indexes as Bytes, so this needs to be less than 
Byte.MAX_VALUE / 2
-    static final int FAN_FACTOR = 1 << FAN_SHIFT;
+
 
     static final int MINIMAL_NODE_SIZE = FAN_FACTOR >> 1;
 
@@ -102,11 +127,6 @@ public class BTree
         return buildInternal(source, source.size(), updateF);
     }
 
-    public static <C, K extends C, V extends C> Object[] build(Iterable<K> 
source, UpdateFunction<K, V> updateF)
-    {
-        return buildInternal(source, -1, updateF);
-    }
-
     /**
      * Creates a BTree containing all of the objects in the provided collection
      *
@@ -121,32 +141,73 @@ public class BTree
         return buildInternal(source, size, updateF);
     }
 
-    /**
-     * As build(), except:
-     * @param size    < 0 if size is unknown
-     */
-    private static <C, K extends C, V extends C> Object[] 
buildInternal(Iterable<K> source, int size, UpdateFunction<K, V> updateF)
+    private static <C, K extends C, V extends C> Object[] 
buildLeaf(Iterator<K> it, int size, UpdateFunction<K, V> updateF)
+    {
+        V[] values = (V[]) new Object[size | 1];
+
+        for (int i = 0; i < size; i++)
+        {
+            K k = it.next();
+            values[i] = updateF.apply(k);
+        }
+        if (updateF != UpdateFunction.noOp())
+            updateF.allocated(ObjectSizes.sizeOfArray(values));
+        return values;
+    }
+
+    private static <C, K extends C, V extends C> Object[] 
buildInternal(Iterator<K> it, int size, int level, UpdateFunction<K, V> updateF)
     {
-        if ((size >= 0) & (size < FAN_FACTOR))
+        assert size > 0;
+        assert level >= 0;
+        if (level == 0)
+            return buildLeaf(it, size, updateF);
+
+        // calcuate child num: (size - (childNum - 1)) / maxChildSize <= 
childNum
+        int childNum = size / (TREE_SIZE[level - 1] + 1) + 1;
+
+        V[] values = (V[]) new Object[childNum * 2];
+        if (updateF != UpdateFunction.noOp())
+            updateF.allocated(ObjectSizes.sizeOfArray(values));
+
+        int[] indexOffsets = new int[childNum];
+        int childPos = childNum - 1;
+
+        int index = 0;
+        for (int i = 0; i < childNum - 1; i++)
         {
-            if (size == 0)
-                return EMPTY_LEAF;
-            // pad to odd length to match contract that all leaf nodes are odd
-            V[] values = (V[]) new Object[size | 1];
-            {
-                int i = 0;
-                for (K k : source)
-                    values[i++] = updateF.apply(k);
-            }
-            if (updateF != UpdateFunction.noOp())
-                updateF.allocated(ObjectSizes.sizeOfArray(values));
-            return values;
+            // Calculate the next childSize by splitting the remaining values 
to the remaining child nodes.
+            // The performance could be improved by pre-compute the childSize 
(see #9989 comments).
+            int childSize = (size - index) / (childNum - i);
+            // Build the tree with inorder traversal
+            values[childPos + i] = (V) buildInternal(it, childSize, level - 1, 
updateF);
+            index += childSize;
+            indexOffsets[i] = index;
+
+            K k = it.next();
+            values[i] = updateF.apply(k);
+            index++;
         }
 
-        TreeBuilder builder = TreeBuilder.newInstance();
-        Object[] btree = builder.build(source, updateF, size);
+        values[childPos + childNum - 1] = (V) buildInternal(it, size - index, 
level - 1, updateF);
+        indexOffsets[childNum - 1] = size;
 
-        return btree;
+        values[childPos + childNum] = (V) indexOffsets;
+
+        return values;
+    }
+
+    private static <C, K extends C, V extends C> Object[] 
buildInternal(Iterable<K> source, int size, UpdateFunction<K, V> updateF)
+    {
+        assert size >= 0;
+        if (size == 0)
+            return EMPTY_LEAF;
+
+        // find out the height of the tree
+        int level = 0;
+        while (size > TREE_SIZE[level])
+            level++;
+        Iterator<K> it = source.iterator();
+        return buildInternal(it, size, level, updateF);
     }
 
     public static <C, K extends C, V extends C> Object[] update(Object[] btree,
@@ -692,7 +753,11 @@ public class BTree
         remainder = filter(transform(remainder, function), (x) -> x != null);
         Iterable<V> build = concat(head, remainder);
 
-        return buildInternal(build, -1, UpdateFunction.<V>noOp());
+        Builder<V> builder = builder((Comparator<? super V>) naturalOrder());
+        builder.auto(false);
+        for (V v : build)
+            builder.add(v);
+        return builder.build();
     }
 
     private static <V> Object[] transformAndFilter(Object[] btree, 
FiltrationTracker<V> function)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java 
b/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java
index f42de0f..128ff6a 100644
--- a/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java
+++ b/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java
@@ -22,8 +22,6 @@ import java.util.Comparator;
 
 import io.netty.util.Recycler;
 
-import static org.apache.cassandra.utils.btree.BTree.EMPTY_LEAF;
-import static org.apache.cassandra.utils.btree.BTree.FAN_SHIFT;
 import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY;
 
 /**
@@ -116,31 +114,7 @@ final class TreeBuilder
         Object[] r = current.toNode();
         current.clear();
 
-        builderRecycler.recycle(this, recycleHandle);
-
-        return r;
-    }
-
-    public <C, K extends C, V extends C> Object[] build(Iterable<K> source, 
UpdateFunction<K, V> updateF, int size)
-    {
-        assert updateF != null;
-
-        NodeBuilder current = rootBuilder;
-        // we descend only to avoid wasting memory; in update() we will often 
descend into existing trees
-        // so here we want to descend also, so we don't have lg max(N) depth 
in both directions
-        while ((size >>= FAN_SHIFT) > 0)
-            current = current.ensureChild();
-
-        current.reset(EMPTY_LEAF, POSITIVE_INFINITY, updateF, null);
-        for (K key : source)
-            current.addNewKey(key);
-
-        current = current.ascendToRoot();
-
-        Object[] r = current.toNode();
-        current.clear();
-
-        builderRecycler.recycle(this, recycleHandle);
+        recycleHandle.recycle(this);
 
         return r;
     }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/test/burn/org/apache/cassandra/utils/LongBTreeTest.java
----------------------------------------------------------------------
diff --git a/test/burn/org/apache/cassandra/utils/LongBTreeTest.java 
b/test/burn/org/apache/cassandra/utils/LongBTreeTest.java
index 5044290..d21f7b3 100644
--- a/test/burn/org/apache/cassandra/utils/LongBTreeTest.java
+++ b/test/burn/org/apache/cassandra/utils/LongBTreeTest.java
@@ -58,9 +58,13 @@ public class LongBTreeTest
 {
 
     private static final boolean DEBUG = false;
-    private static int perThreadTrees = 10000;
+    private static int perThreadTrees = 100;
     private static int minTreeSize = 4;
     private static int maxTreeSize = 10000;
+    private static float generateTreeByUpdateChance = 0.8f;
+    private static float generateTreeByCopyChance = 0.1f;
+    private static float generateTreeByBuilderChance = 0.1f;
+    private static float generateTreeTotalChance = generateTreeByUpdateChance 
+ generateTreeByCopyChance + generateTreeByBuilderChance;
     private static int threads = DEBUG ? 1 : 
Runtime.getRuntime().availableProcessors() * 8;
     private static final MetricRegistry metrics = new MetricRegistry();
     private static final Timer BTREE_TIMER = 
metrics.timer(MetricRegistry.name(BTree.class, "BTREE"));
@@ -80,8 +84,12 @@ public class LongBTreeTest
     public void testSearchIterator() throws InterruptedException
     {
         final int perTreeSelections = 100;
-        testRandomSelection(perThreadTrees, perTreeSelections,
-        (test) -> {
+        testRandomSelection(perThreadTrees, perTreeSelections, 
testSearchIteratorFactory());
+    }
+
+    private BTreeTestFactory testSearchIteratorFactory()
+    {
+        return (test) -> {
             IndexedSearchIterator<Integer, Integer> iter1 = 
test.testAsSet.iterator();
             IndexedSearchIterator<Integer, Integer> iter2 = 
test.testAsList.iterator();
             return (key) ->
@@ -110,45 +118,52 @@ public class LongBTreeTest
                 else
                     Assert.assertNull(iter2.next(key));
             };
-        });
+        };
     }
 
     @Test
     public void testInequalityLookups() throws InterruptedException
     {
         final int perTreeSelections = 2;
-        testRandomSelectionOfSet(perThreadTrees, perTreeSelections,
-                                 (test, canonical) -> {
-                                     if (!canonical.isEmpty() || 
!test.isEmpty())
-                                     {
-                                         
Assert.assertEquals(canonical.isEmpty(), test.isEmpty());
-                                         
Assert.assertEquals(canonical.first(), test.first());
-                                         Assert.assertEquals(canonical.last(), 
test.last());
-                                     }
-                                     return (key) ->
-                                     {
-                                         
Assert.assertEquals(test.ceiling(key), canonical.ceiling(key));
-                                         Assert.assertEquals(test.higher(key), 
canonical.higher(key));
-                                         Assert.assertEquals(test.floor(key), 
canonical.floor(key));
-                                         Assert.assertEquals(test.lower(key), 
canonical.lower(key));
-                                     };
-                                 });
+        testRandomSelectionOfSet(perThreadTrees, perTreeSelections, 
testInequalityLookupsFactory());
+    }
+
+    private BTreeSetTestFactory testInequalityLookupsFactory()
+    {
+        return (test, canonical) -> {
+            if (!canonical.isEmpty() || !test.isEmpty())
+            {
+                Assert.assertEquals(canonical.isEmpty(), test.isEmpty());
+                Assert.assertEquals(canonical.first(), test.first());
+                Assert.assertEquals(canonical.last(), test.last());
+            }
+            return (key) ->
+            {
+                Assert.assertEquals(test.ceiling(key), canonical.ceiling(key));
+                Assert.assertEquals(test.higher(key), canonical.higher(key));
+                Assert.assertEquals(test.floor(key), canonical.floor(key));
+                Assert.assertEquals(test.lower(key), canonical.lower(key));
+            };
+        };
     }
 
     @Test
     public void testListIndexes() throws InterruptedException
     {
-        testRandomSelectionOfList(perThreadTrees, 4,
-                                  (test, canonical, cmp) ->
-                                  (key) ->
-                                  {
-                                      int javaIndex = 
Collections.binarySearch(canonical, key, cmp);
-                                      int btreeIndex = test.indexOf(key);
-                                      Assert.assertEquals(javaIndex, 
btreeIndex);
-                                      if (javaIndex >= 0)
-                                          
Assert.assertEquals(canonical.get(javaIndex), test.get(btreeIndex));
-                                  }
-        );
+        testRandomSelectionOfList(perThreadTrees, 4, testListIndexesFactory());
+    }
+
+    private BTreeListTestFactory testListIndexesFactory()
+    {
+        return (test, canonical, cmp) ->
+                (key) ->
+                {
+                    int javaIndex = Collections.binarySearch(canonical, key, 
cmp);
+                    int btreeIndex = test.indexOf(key);
+                    Assert.assertEquals(javaIndex, btreeIndex);
+                    if (javaIndex >= 0)
+                        Assert.assertEquals(canonical.get(javaIndex), 
test.get(btreeIndex));
+                };
     }
 
     @Test
@@ -264,13 +279,23 @@ public class LongBTreeTest
         void testOne(Integer value);
     }
 
+    private void run(BTreeTestFactory testRun, RandomSelection selection)
+    {
+        TestEachKey testEachKey = testRun.get(selection);
+        for (Integer key : selection.testKeys)
+            testEachKey.testOne(key);
+    }
+
+    private void run(BTreeSetTestFactory testRun, RandomSelection selection)
+    {
+        TestEachKey testEachKey = testRun.get(selection.testAsSet, 
selection.canonicalSet);
+        for (Integer key : selection.testKeys)
+            testEachKey.testOne(key);
+    }
+
     private void testRandomSelection(int perThreadTrees, int 
perTreeSelections, BTreeTestFactory testRun) throws InterruptedException
     {
-        testRandomSelection(perThreadTrees, perTreeSelections, (selection) -> {
-            TestEachKey testEachKey = testRun.get(selection);
-            for (Integer key : selection.testKeys)
-                testEachKey.testOne(key);
-        });
+        testRandomSelection(perThreadTrees, perTreeSelections, 
(RandomSelection selection) -> run(testRun, selection));
     }
 
     private void testRandomSelection(int perThreadTrees, int 
perTreeSelections, Consumer<RandomSelection> testRun) throws 
InterruptedException
@@ -287,29 +312,29 @@ public class LongBTreeTest
         final long totalCount = threads * perThreadTrees * perTreeSelections;
         for (int t = 0 ; t < threads ; t++)
         {
-            Runnable runnable = new Runnable()
+            Runnable runnable = () ->
             {
-                public void run()
+                try
                 {
-                    try
+                    for (int i = 0 ; i < perThreadTrees ; i++)
                     {
-                        for (int i = 0 ; i < perThreadTrees ; i++)
+                        // not easy to usefully log seed, as run tests in 
parallel; need to really pass through to exceptions
+                        long seed = ThreadLocalRandom.current().nextLong();
+                        Random random = new Random(seed);
+                        RandomTree tree = randomTree(minTreeSize, maxTreeSize, 
random);
+                        for (int j = 0 ; j < perTreeSelections ; j++)
                         {
-                            RandomTree tree = randomTree(minTreeSize, 
maxTreeSize);
-                            for (int j = 0 ; j < perTreeSelections ; j++)
-                            {
-                                testRun.accept(tree.select(narrow, 
mixInNotPresentItems, permitReversal));
-                                count.incrementAndGet();
-                            }
+                            testRun.accept(tree.select(narrow, 
mixInNotPresentItems, permitReversal));
+                            count.incrementAndGet();
                         }
                     }
-                    catch (Throwable t)
-                    {
-                        errors.incrementAndGet();
-                        t.printStackTrace();
-                    }
-                    latch.countDown();
                 }
+                catch (Throwable t1)
+                {
+                    errors.incrementAndGet();
+                    t1.printStackTrace();
+                }
+                latch.countDown();
             };
             MODIFY.execute(runnable);
         }
@@ -347,18 +372,19 @@ public class LongBTreeTest
 
     private static class RandomTree
     {
+        final Random random;
         final NavigableSet<Integer> canonical;
         final BTreeSet<Integer> test;
 
-        private RandomTree(NavigableSet<Integer> canonical, BTreeSet<Integer> 
test)
+        private RandomTree(NavigableSet<Integer> canonical, BTreeSet<Integer> 
test, Random random)
         {
             this.canonical = canonical;
             this.test = test;
+            this.random = random;
         }
 
         RandomSelection select(boolean narrow, boolean mixInNotPresentItems, 
boolean permitReversal)
         {
-            ThreadLocalRandom random = ThreadLocalRandom.current();
             NavigableSet<Integer> canonicalSet = this.canonical;
             BTreeSet<Integer> testAsSet = this.test;
             List<Integer> canonicalList = new ArrayList<>(canonicalSet);
@@ -368,7 +394,7 @@ public class LongBTreeTest
             Assert.assertEquals(canonicalList.size(), testAsList.size());
 
             // sometimes select keys first, so we cover full range
-            List<Integer> allKeys = randomKeys(canonical, 
mixInNotPresentItems);
+            List<Integer> allKeys = randomKeys(canonical, 
mixInNotPresentItems, random);
             List<Integer> keys = allKeys;
 
             int narrowCount = random.nextInt(3);
@@ -391,7 +417,7 @@ public class LongBTreeTest
 
                 if (useLb)
                 {
-                    lbKeyIndex = random.nextInt(0, indexRange - 1);
+                    lbKeyIndex = random.nextInt(indexRange - 1);
                     Integer candidate = keys.get(lbKeyIndex);
                     if (useLb = (candidate > lbKey && candidate <= ubKey))
                     {
@@ -404,7 +430,8 @@ public class LongBTreeTest
                 }
                 if (useUb)
                 {
-                    ubKeyIndex = random.nextInt(Math.max(lbKeyIndex, 
keys.size() - indexRange), keys.size() - 1);
+                    int lb = Math.max(lbKeyIndex, keys.size() - indexRange);
+                    ubKeyIndex = random.nextInt(keys.size() - (1 + lb)) + lb;
                     Integer candidate = keys.get(ubKeyIndex);
                     if (useUb = (candidate < ubKey && candidate >= lbKey))
                     {
@@ -468,31 +495,53 @@ public class LongBTreeTest
         }
     }
 
-    private static RandomTree randomTree(int minSize, int maxSize)
+    private static RandomTree randomTree(int minSize, int maxSize, Random 
random)
     {
         // perform most of our tree constructions via update, as this is more 
efficient; since every run uses this
         // we test builder disproportionately more often than if it had its 
own test anyway
-        return ThreadLocalRandom.current().nextFloat() < 0.95 ? 
randomTreeByUpdate(minSize, maxSize)
-                                                              : 
randomTreeByBuilder(minSize, maxSize);
+        int maxIntegerValue = random.nextInt(Integer.MAX_VALUE - 1) + 1;
+        float f = random.nextFloat() / generateTreeTotalChance;
+        f -= generateTreeByUpdateChance;
+        if (f < 0)
+            return randomTreeByUpdate(minSize, maxSize, maxIntegerValue, 
random);
+        f -= generateTreeByCopyChance;
+        if (f < 0)
+            return randomTreeByCopy(minSize, maxSize, maxIntegerValue, random);
+        return randomTreeByBuilder(minSize, maxSize, maxIntegerValue, random);
     }
 
-    private static RandomTree randomTreeByUpdate(int minSize, int maxSize)
+    private static RandomTree randomTreeByCopy(int minSize, int maxSize, int 
maxIntegerValue, Random random)
     {
         assert minSize > 3;
         TreeSet<Integer> canonical = new TreeSet<>();
 
-        ThreadLocalRandom random = ThreadLocalRandom.current();
-        int targetSize = random.nextInt(minSize, maxSize);
-        int maxModificationSize = random.nextInt(2, targetSize);
+        int targetSize = random.nextInt(maxSize - minSize) + minSize;
+        int curSize = 0;
+        while (curSize < targetSize)
+        {
+            Integer next = random.nextInt(maxIntegerValue);
+            if (canonical.add(next))
+                ++curSize;
+        }
+        return new RandomTree(canonical, 
BTreeSet.<Integer>wrap(BTree.build(canonical, UpdateFunction.noOp()), 
naturalOrder()), random);
+    }
+
+    private static RandomTree randomTreeByUpdate(int minSize, int maxSize, int 
maxIntegerValue, Random random)
+    {
+        assert minSize > 3;
+        TreeSet<Integer> canonical = new TreeSet<>();
+
+        int targetSize = random.nextInt(maxSize - minSize) + minSize;
+        int maxModificationSize = random.nextInt(targetSize - 2) + 2;
         Object[] accmumulate = BTree.empty();
         int curSize = 0;
         while (curSize < targetSize)
         {
-            int nextSize = maxModificationSize == 1 ? 1 : random.nextInt(1, 
maxModificationSize);
+            int nextSize = maxModificationSize == 1 ? 1 : 
random.nextInt(maxModificationSize - 1) + 1;
             TreeSet<Integer> build = new TreeSet<>();
             for (int i = 0 ; i < nextSize ; i++)
             {
-                Integer next = random.nextInt();
+                Integer next = random.nextInt(maxIntegerValue);
                 build.add(next);
                 canonical.add(next);
             }
@@ -500,16 +549,15 @@ public class LongBTreeTest
             curSize += nextSize;
             maxModificationSize = Math.min(maxModificationSize, targetSize - 
curSize);
         }
-        return new RandomTree(canonical, BTreeSet.<Integer>wrap(accmumulate, 
naturalOrder()));
+        return new RandomTree(canonical, BTreeSet.<Integer>wrap(accmumulate, 
naturalOrder()), random);
     }
 
-    private static RandomTree randomTreeByBuilder(int minSize, int maxSize)
+    private static RandomTree randomTreeByBuilder(int minSize, int maxSize, 
int maxIntegerValue, Random random)
     {
         assert minSize > 3;
-        ThreadLocalRandom random = ThreadLocalRandom.current();
         BTree.Builder<Integer> builder = BTree.builder(naturalOrder());
 
-        int targetSize = random.nextInt(minSize, maxSize);
+        int targetSize = random.nextInt(maxSize - minSize) + minSize;
         int maxModificationSize = (int) Math.sqrt(targetSize);
 
         TreeSet<Integer> canonical = new TreeSet<>();
@@ -519,7 +567,7 @@ public class LongBTreeTest
         List<Integer> shuffled = new ArrayList<>();
         while (curSize < targetSize)
         {
-            int nextSize = maxModificationSize <= 1 ? 1 : random.nextInt(1, 
maxModificationSize);
+            int nextSize = maxModificationSize <= 1 ? 1 : 
random.nextInt(maxModificationSize - 1) + 1;
 
             // leave a random selection of previous values
             (random.nextBoolean() ? ordered.headSet(random.nextInt()) : 
ordered.tailSet(random.nextInt())).clear();
@@ -527,7 +575,7 @@ public class LongBTreeTest
 
             for (int i = 0 ; i < nextSize ; i++)
             {
-                Integer next = random.nextInt();
+                Integer next = random.nextInt(maxIntegerValue);
                 ordered.add(next);
                 shuffled.add(next);
                 canonical.add(next);
@@ -558,16 +606,15 @@ public class LongBTreeTest
 
         BTreeSet<Integer> btree = BTreeSet.<Integer>wrap(builder.build(), 
naturalOrder());
         Assert.assertEquals(canonical.size(), btree.size());
-        return new RandomTree(canonical, btree);
+        return new RandomTree(canonical, btree, random);
     }
 
     // select a random subset of the keys, with an optional random population 
of keys inbetween those that are present
     // return a value with the search position
-    private static List<Integer> randomKeys(Iterable<Integer> canonical, 
boolean mixInNotPresentItems)
+    private static List<Integer> randomKeys(Iterable<Integer> canonical, 
boolean mixInNotPresentItems, Random random)
     {
-        ThreadLocalRandom rnd = ThreadLocalRandom.current();
-        boolean useFake = mixInNotPresentItems && rnd.nextBoolean();
-        final float fakeRatio = rnd.nextFloat();
+        boolean useFake = mixInNotPresentItems && random.nextBoolean();
+        final float fakeRatio = random.nextFloat();
         List<Integer> results = new ArrayList<>();
         Long fakeLb = (long) Integer.MIN_VALUE, fakeUb = null;
         Integer max = null;
@@ -575,7 +622,7 @@ public class LongBTreeTest
         {
             if (    !useFake
                 ||  (fakeUb == null ? v - 1 : fakeUb) <= fakeLb + 1
-                ||  rnd.nextFloat() < fakeRatio)
+                ||  random.nextFloat() < fakeRatio)
             {
                 // if we cannot safely construct a fake value, or our 
randomizer says not to, we emit the next real value
                 results.add(v);
@@ -597,13 +644,32 @@ public class LongBTreeTest
         }
         if (useFake && max != null && max < Integer.MAX_VALUE)
             results.add(max + 1);
-        final float useChance = rnd.nextFloat();
-        return Lists.newArrayList(filter(results, (x) -> rnd.nextFloat() < 
useChance));
+        final float useChance = random.nextFloat();
+        return Lists.newArrayList(filter(results, (x) -> random.nextFloat() < 
useChance));
     }
 
     /************************** TEST MUTATION 
********************************************/
 
     @Test
+    public void testBuildNewTree()
+    {
+        int max = 10000;
+        final List<Integer> list = new ArrayList<>(max);
+        final NavigableSet<Integer> set = new TreeSet<>();
+        BTreeSetTestFactory test = testInequalityLookupsFactory();
+        for (int i = 0 ; i < max ; ++i)
+        {
+            list.add(i);
+            set.add(i);
+            Object[] tree = BTree.build(list, UpdateFunction.noOp());
+            Assert.assertTrue(BTree.isWellFormed(tree, 
Comparator.naturalOrder()));
+            BTreeSet<Integer> btree = new BTreeSet<>(tree, 
Comparator.naturalOrder());
+            RandomSelection selection = new RandomSelection(list, set, btree, 
list, btree, Comparator.naturalOrder());
+            run(test, selection);
+        }
+    }
+
+    @Test
     public void testOversizedMiddleInsert()
     {
         TreeSet<Integer> canon = new TreeSet<>();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/test/microbench/org/apache/cassandra/test/microbench/BTreeBuildBench.java
----------------------------------------------------------------------
diff --git 
a/test/microbench/org/apache/cassandra/test/microbench/BTreeBuildBench.java 
b/test/microbench/org/apache/cassandra/test/microbench/BTreeBuildBench.java
index 0d89ebb..f9ab004 100644
--- a/test/microbench/org/apache/cassandra/test/microbench/BTreeBuildBench.java
+++ b/test/microbench/org/apache/cassandra/test/microbench/BTreeBuildBench.java
@@ -93,4 +93,10 @@ public class BTreeBuildBench
         Object[] btree = builder.build();
         return BTree.size(btree);
     }
+
+    @Benchmark
+    public int buildTreeTest()
+    {
+        return buildTree(data);
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/test/unit/org/apache/cassandra/utils/BTreeTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/BTreeTest.java 
b/test/unit/org/apache/cassandra/utils/BTreeTest.java
deleted file mode 100644
index 4f120e4..0000000
--- a/test/unit/org/apache/cassandra/utils/BTreeTest.java
+++ /dev/null
@@ -1,503 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.cassandra.utils;
-
-import java.util.*;
-import java.util.concurrent.ThreadLocalRandom;
-
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import org.junit.Test;
-
-import org.junit.Assert;
-import org.apache.cassandra.utils.btree.BTree;
-import org.apache.cassandra.utils.btree.UpdateFunction;
-
-import static org.junit.Assert.*;
-
-public class BTreeTest
-{
-    static Integer[] ints = new Integer[20];
-    static
-    {
-        System.setProperty("cassandra.btree.fanfactor", "4");
-        for (int i = 0 ; i < ints.length ; i++)
-            ints[i] = new Integer(i);
-    }
-
-    static final UpdateFunction<Integer, Integer> updateF = new 
UpdateFunction<Integer, Integer>()
-    {
-        public Integer apply(Integer replacing, Integer update)
-        {
-            return ints[update];
-        }
-
-        public boolean abortEarly()
-        {
-            return false;
-        }
-
-        public void allocated(long heapSize)
-        {
-
-        }
-
-        public Integer apply(Integer integer)
-        {
-            return ints[integer];
-        }
-    };
-
-    private static final UpdateFunction<Integer, Integer> noOp = new 
UpdateFunction<Integer, Integer>()
-    {
-        public Integer apply(Integer replacing, Integer update)
-        {
-            return update;
-        }
-
-        public boolean abortEarly()
-        {
-            return false;
-        }
-
-        public void allocated(long heapSize)
-        {
-        }
-
-        public Integer apply(Integer k)
-        {
-            return k;
-        }
-    };
-
-    private static List<Integer> seq(int count)
-    {
-        List<Integer> r = new ArrayList<>();
-        for (int i = 0 ; i < count ; i++)
-            r.add(i);
-        return r;
-    }
-
-    private static List<Integer> rand(int count)
-    {
-        Random rand = ThreadLocalRandom.current();
-        List<Integer> r = seq(count);
-        for (int i = 0 ; i < count - 1 ; i++)
-        {
-            int swap = i + rand.nextInt(count - i);
-            Integer tmp = r.get(i);
-            r.set(i, r.get(swap));
-            r.set(swap, tmp);
-        }
-        return r;
-    }
-
-    private static final Comparator<Integer> CMP = new Comparator<Integer>()
-    {
-        public int compare(Integer o1, Integer o2)
-        {
-            return Integer.compare(o1, o2);
-        }
-    };
-
-    @Test
-    public void testBuilding_UpdateFunctionReplacement()
-    {
-        for (int i = 0; i < 20 ; i++)
-            checkResult(i, BTree.build(seq(i), updateF));
-    }
-
-    @Test
-    public void testUpdate_UpdateFunctionReplacement()
-    {
-        for (int i = 0; i < 20 ; i++)
-            checkResult(i, BTree.update(BTree.build(seq(i), noOp), CMP, 
seq(i), updateF));
-    }
-
-    @Test
-    public void testApplyForwards()
-    {
-        List<Integer> input = seq(71);
-        Object[] btree = BTree.build(input, noOp);
-
-        final List<Integer> result = new ArrayList<>();
-        BTree.<Integer>apply(btree, i -> result.add(i), false);
-
-        org.junit.Assert.assertArrayEquals(input.toArray(),result.toArray());
-    }
-
-    @Test
-    public void testApplyReverse()
-    {
-        List<Integer> input = seq(71);
-        Object[] btree = BTree.build(input, noOp);
-
-        final List<Integer> result = new ArrayList<>();
-        BTree.<Integer>apply(btree, i -> result.add(i), true);
-
-        
org.junit.Assert.assertArrayEquals(Lists.reverse(input).toArray(),result.toArray());
-    }
-
-    /**
-     * Tests that the apply method of the <code>UpdateFunction</code> is only 
called once with each key update.
-     * (see CASSANDRA-8018).
-     */
-    @Test
-    public void testUpdate_UpdateFunctionCallBack()
-    {
-        Object[] btree = new Object[1];
-        CallsMonitor monitor = new CallsMonitor();
-
-        btree = BTree.update(btree, CMP, Arrays.asList(1), monitor);
-        assertArrayEquals(new Object[] {1}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(1));
-
-        monitor.clear();
-        btree = BTree.update(btree, CMP, Arrays.asList(2), monitor);
-        assertArrayEquals(new Object[] {1, 2, null}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(2));
-
-        // with existing value
-        monitor.clear();
-        btree = BTree.update(btree, CMP, Arrays.asList(1), monitor);
-        assertArrayEquals(new Object[] {1, 2, null}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(1));
-
-        // with two non-existing values
-        monitor.clear();
-        btree = BTree.update(btree, CMP, Arrays.asList(3, 4), monitor);
-        assertArrayEquals(new Object[] {1, 2, 3, 4, null}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(3));
-        assertEquals(1, monitor.getNumberOfCalls(4));
-
-        // with one existing value and one non existing value
-        monitor.clear();
-        btree = BTree.update(btree, CMP, Arrays.asList(2, 5), monitor);
-        assertArrayEquals(new Object[] {3, new Object[]{1, 2, null}, new 
Object[]{4, 5, null},  new int[]{2, 5}}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(2));
-        assertEquals(1, monitor.getNumberOfCalls(5));
-    }
-
-    /**
-     * Tests that the apply method of the <code>UpdateFunction</code> is only 
called once per value with each build call.
-     */
-    @Test
-    public void testBuilding_UpdateFunctionCallBack()
-    {
-        CallsMonitor monitor = new CallsMonitor();
-        Object[] btree = BTree.build(Arrays.asList(1), monitor);
-        assertArrayEquals(new Object[] {1}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(1));
-
-        monitor.clear();
-        btree = BTree.build(Arrays.asList(1, 2), monitor);
-        assertArrayEquals(new Object[] {1, 2, null}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(1));
-        assertEquals(1, monitor.getNumberOfCalls(2));
-
-        monitor.clear();
-        btree = BTree.build(Arrays.asList(1, 2, 3), monitor);
-        assertArrayEquals(new Object[] {1, 2, 3}, btree);
-        assertEquals(1, monitor.getNumberOfCalls(1));
-        assertEquals(1, monitor.getNumberOfCalls(2));
-        assertEquals(1, monitor.getNumberOfCalls(3));
-    }
-
-    /**
-     * Tests that the apply method of the <code>QuickResolver</code> is called 
exactly once per duplicate value
-     */
-    @Test
-    public void testBuilder_QuickResolver()
-    {
-        // for numbers x in 1..N, we repeat x x times, and resolve values to 
their sum,
-        // so that the resulting tree is of square numbers
-        BTree.Builder.QuickResolver<Accumulator> resolver = (a, b) -> new 
Accumulator(a.base, a.sum + b.sum);
-
-        for (int count = 0 ; count < 10 ; count ++)
-        {
-            BTree.Builder<Accumulator> builder;
-            // first check we produce the right output for sorted input
-            List<Accumulator> sorted = resolverInput(count, false);
-            builder = BTree.builder(Comparator.naturalOrder());
-            builder.setQuickResolver(resolver);
-            for (Accumulator i : sorted)
-                builder.add(i);
-            // for sorted input, check non-resolve path works before checking 
resolution path
-            checkResolverOutput(count, builder.build(), BTree.Dir.ASC);
-            builder = BTree.builder(Comparator.naturalOrder());
-            builder.setQuickResolver(resolver);
-            for (int i = 0 ; i < 10 ; i++)
-            {
-                // now do a few runs of randomized inputs
-                for (Accumulator j : resolverInput(count, true))
-                    builder.add(j);
-                checkResolverOutput(count, builder.build(), BTree.Dir.ASC);
-                builder = BTree.builder(Comparator.naturalOrder());
-                builder.setQuickResolver(resolver);
-            }
-            for (List<Accumulator> add : splitResolverInput(count))
-            {
-                if (ThreadLocalRandom.current().nextBoolean())
-                    builder.addAll(add);
-                else
-                    builder.addAll(new TreeSet<>(add));
-            }
-            checkResolverOutput(count, builder.build(), BTree.Dir.ASC);
-        }
-    }
-
-    @Test
-    public void testBuilderReuse()
-    {
-        List<Integer> sorted = seq(20);
-        BTree.Builder<Integer> builder = 
BTree.builder(Comparator.naturalOrder());
-        builder.auto(false);
-        for (int i : sorted)
-            builder.add(i);
-        checkResult(20, builder.build());
-
-        builder.reuse();
-        assertTrue(builder.build() == BTree.empty());
-        for (int i = 0; i < 12; i++)
-            builder.add(sorted.get(i));
-        checkResult(12, builder.build());
-
-        builder.auto(true);
-        builder.reuse(Comparator.reverseOrder());
-        for (int i = 0; i < 12; i++)
-            builder.add(sorted.get(i));
-        checkResult(12, builder.build(), BTree.Dir.DESC);
-
-        builder.reuse();
-        assertTrue(builder.build() == BTree.empty());
-    }
-
-    private static class Accumulator extends Number implements 
Comparable<Accumulator>
-    {
-        final int base;
-        final int sum;
-        private Accumulator(int base, int sum)
-        {
-            this.base = base;
-            this.sum = sum;
-        }
-
-        public int compareTo(Accumulator that) { return Integer.compare(base, 
that.base); }
-        public int intValue() { return sum; }
-        public long longValue() { return sum; }
-        public float floatValue() { return sum; }
-        public double doubleValue() { return sum; }
-    }
-
-    /**
-     * Tests that the apply method of the <code>Resolver</code> is called 
exactly once per unique value
-     */
-    @Test
-    public void testBuilder_ResolverAndReverse()
-    {
-        // for numbers x in 1..N, we repeat x x times, and resolve values to 
their sum,
-        // so that the resulting tree is of square numbers
-        BTree.Builder.Resolver resolver = (array, lb, ub) -> {
-            int sum = 0;
-            for (int i = lb ; i < ub ; i++)
-                sum += ((Accumulator) array[i]).sum;
-            return new Accumulator(((Accumulator) array[lb]).base, sum);
-        };
-
-        for (int count = 0 ; count < 10 ; count ++)
-        {
-            BTree.Builder<Accumulator> builder;
-            // first check we produce the right output for sorted input
-            List<Accumulator> sorted = resolverInput(count, false);
-            builder = BTree.builder(Comparator.naturalOrder());
-            builder.auto(false);
-            for (Accumulator i : sorted)
-                builder.add(i);
-            // for sorted input, check non-resolve path works before checking 
resolution path
-            Assert.assertTrue(Iterables.elementsEqual(sorted, 
BTree.iterable(builder.build())));
-
-            builder = BTree.builder(Comparator.naturalOrder());
-            builder.auto(false);
-            for (Accumulator i : sorted)
-                builder.add(i);
-            // check resolution path
-            checkResolverOutput(count, builder.resolve(resolver).build(), 
BTree.Dir.ASC);
-
-            builder = BTree.builder(Comparator.naturalOrder());
-            builder.auto(false);
-            for (int i = 0 ; i < 10 ; i++)
-            {
-                // now do a few runs of randomized inputs
-                for (Accumulator j : resolverInput(count, true))
-                    builder.add(j);
-                checkResolverOutput(count, 
builder.sort().resolve(resolver).build(), BTree.Dir.ASC);
-                builder = BTree.builder(Comparator.naturalOrder());
-                builder.auto(false);
-                for (Accumulator j : resolverInput(count, true))
-                    builder.add(j);
-                checkResolverOutput(count, 
builder.sort().reverse().resolve(resolver).build(), BTree.Dir.DESC);
-                builder = BTree.builder(Comparator.naturalOrder());
-                builder.auto(false);
-            }
-        }
-    }
-
-    private static List<Accumulator> resolverInput(int count, boolean shuffled)
-    {
-        List<Accumulator> result = new ArrayList<>();
-        for (int i = 1 ; i <= count ; i++)
-            for (int j = 0 ; j < i ; j++)
-                result.add(new Accumulator(i, i));
-        if (shuffled)
-        {
-            ThreadLocalRandom random = ThreadLocalRandom.current();
-            for (int i = 0 ; i < result.size() ; i++)
-            {
-                int swapWith = random.nextInt(i, result.size());
-                Accumulator t = result.get(swapWith);
-                result.set(swapWith, result.get(i));
-                result.set(i, t);
-            }
-        }
-        return result;
-    }
-
-    private static List<List<Accumulator>> splitResolverInput(int count)
-    {
-        List<Accumulator> all = resolverInput(count, false);
-        List<List<Accumulator>> result = new ArrayList<>();
-        while (!all.isEmpty())
-        {
-            List<Accumulator> is = new ArrayList<>();
-            int prev = -1;
-            for (Accumulator i : new ArrayList<>(all))
-            {
-                if (i.base == prev)
-                    continue;
-                is.add(i);
-                all.remove(i);
-                prev = i.base;
-            }
-            result.add(is);
-        }
-        return result;
-    }
-
-    private static void checkResolverOutput(int count, Object[] btree, 
BTree.Dir dir)
-    {
-        int i = 1;
-        for (Accumulator current : BTree.<Accumulator>iterable(btree, dir))
-        {
-            Assert.assertEquals(i * i, current.sum);
-            i++;
-        }
-        Assert.assertEquals(i, count + 1);
-    }
-
-    private static void checkResult(int count, Object[] btree)
-    {
-        checkResult(count, btree, BTree.Dir.ASC);
-    }
-
-    private static void checkResult(int count, Object[] btree, BTree.Dir dir)
-    {
-        Iterator<Integer> iter = BTree.slice(btree, CMP, dir);
-        int i = 0;
-        while (iter.hasNext())
-            assertEquals(iter.next(), ints[i++]);
-        assertEquals(count, i);
-    }
-
-    @Test
-    public void testClearOnAbort()
-    {
-        Object[] btree = BTree.build(seq(2), noOp);
-        Object[] copy = Arrays.copyOf(btree, btree.length);
-        BTree.update(btree, CMP, seq(94), new AbortAfterX(90));
-
-        assertArrayEquals(copy, btree);
-
-        btree = BTree.update(btree, CMP, seq(94), noOp);
-        assertTrue(BTree.isWellFormed(btree, CMP));
-    }
-
-    private static final class AbortAfterX implements UpdateFunction<Integer, 
Integer>
-    {
-        int counter;
-        final int abortAfter;
-        private AbortAfterX(int abortAfter)
-        {
-            this.abortAfter = abortAfter;
-        }
-        public Integer apply(Integer replacing, Integer update)
-        {
-            return update;
-        }
-        public boolean abortEarly()
-        {
-            return counter++ > abortAfter;
-        }
-        public void allocated(long heapSize)
-        {
-        }
-        public Integer apply(Integer v)
-        {
-            return v;
-        }
-    }
-
-    /**
-     * <code>UpdateFunction</code> that count the number of call made to apply 
for each value.
-     */
-    public static final class CallsMonitor implements UpdateFunction<Integer, 
Integer>
-    {
-        private int[] numberOfCalls = new int[20];
-
-        public Integer apply(Integer replacing, Integer update)
-        {
-            numberOfCalls[update] = numberOfCalls[update] + 1;
-            return update;
-        }
-
-        public boolean abortEarly()
-        {
-            return false;
-        }
-
-        public void allocated(long heapSize)
-        {
-
-        }
-
-        public Integer apply(Integer integer)
-        {
-            numberOfCalls[integer] = numberOfCalls[integer] + 1;
-            return integer;
-        }
-
-        public int getNumberOfCalls(Integer key)
-        {
-            return numberOfCalls[key];
-        }
-
-        public void clear()
-        {
-            Arrays.fill(numberOfCalls, 0);
-        }
-    };
-}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2e59ea8c/test/unit/org/apache/cassandra/utils/btree/BTreeTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/utils/btree/BTreeTest.java 
b/test/unit/org/apache/cassandra/utils/btree/BTreeTest.java
new file mode 100644
index 0000000..7cc1291
--- /dev/null
+++ b/test/unit/org/apache/cassandra/utils/btree/BTreeTest.java
@@ -0,0 +1,608 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.utils.btree;
+
+import java.util.*;
+import java.util.concurrent.ThreadLocalRandom;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import org.junit.Test;
+
+import org.junit.Assert;
+
+import static org.apache.cassandra.utils.btree.BTree.EMPTY_LEAF;
+import static org.apache.cassandra.utils.btree.BTree.FAN_FACTOR;
+import static org.apache.cassandra.utils.btree.BTree.FAN_SHIFT;
+import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY;
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+
+public class BTreeTest
+{
+    static Integer[] ints = new Integer[20];
+    static
+    {
+        System.setProperty("cassandra.btree.fanfactor", "4");
+        for (int i = 0 ; i < ints.length ; i++)
+            ints[i] = new Integer(i);
+    }
+
+    static final UpdateFunction<Integer, Integer> updateF = new 
UpdateFunction<Integer, Integer>()
+    {
+        public Integer apply(Integer replacing, Integer update)
+        {
+            return ints[update];
+        }
+
+        public boolean abortEarly()
+        {
+            return false;
+        }
+
+        public void allocated(long heapSize)
+        {
+
+        }
+
+        public Integer apply(Integer integer)
+        {
+            return ints[integer];
+        }
+    };
+
+    private static final UpdateFunction<Integer, Integer> noOp = new 
UpdateFunction<Integer, Integer>()
+    {
+        public Integer apply(Integer replacing, Integer update)
+        {
+            return update;
+        }
+
+        public boolean abortEarly()
+        {
+            return false;
+        }
+
+        public void allocated(long heapSize)
+        {
+        }
+
+        public Integer apply(Integer k)
+        {
+            return k;
+        }
+    };
+
+    private static List<Integer> seq(int count)
+    {
+        List<Integer> r = new ArrayList<>();
+        for (int i = 0 ; i < count ; i++)
+            r.add(i);
+        return r;
+    }
+
+    private static List<Integer> rand(int count)
+    {
+        Random rand = ThreadLocalRandom.current();
+        List<Integer> r = seq(count);
+        for (int i = 0 ; i < count - 1 ; i++)
+        {
+            int swap = i + rand.nextInt(count - i);
+            Integer tmp = r.get(i);
+            r.set(i, r.get(swap));
+            r.set(swap, tmp);
+        }
+        return r;
+    }
+
+    private static final Comparator<Integer> CMP = new Comparator<Integer>()
+    {
+        public int compare(Integer o1, Integer o2)
+        {
+            return Integer.compare(o1, o2);
+        }
+    };
+
+    @Test
+    public void testBuilding_UpdateFunctionReplacement()
+    {
+        for (int i = 0; i < 20 ; i++)
+            checkResult(i, BTree.build(seq(i), updateF));
+    }
+
+    @Test
+    public void testUpdate_UpdateFunctionReplacement()
+    {
+        for (int i = 0; i < 20 ; i++)
+            checkResult(i, BTree.update(BTree.build(seq(i), noOp), CMP, 
seq(i), updateF));
+    }
+
+    @Test
+    public void testApplyForwards()
+    {
+        List<Integer> input = seq(71);
+        Object[] btree = BTree.build(input, noOp);
+
+        final List<Integer> result = new ArrayList<>();
+        BTree.<Integer>apply(btree, i -> result.add(i), false);
+
+        org.junit.Assert.assertArrayEquals(input.toArray(),result.toArray());
+    }
+
+    @Test
+    public void testApplyReverse()
+    {
+        List<Integer> input = seq(71);
+        Object[] btree = BTree.build(input, noOp);
+
+        final List<Integer> result = new ArrayList<>();
+        BTree.<Integer>apply(btree, i -> result.add(i), true);
+
+        
org.junit.Assert.assertArrayEquals(Lists.reverse(input).toArray(),result.toArray());
+    }
+
+    /**
+     * Tests that the apply method of the <code>UpdateFunction</code> is only 
called once with each key update.
+     * (see CASSANDRA-8018).
+     */
+    @Test
+    public void testUpdate_UpdateFunctionCallBack()
+    {
+        Object[] btree = new Object[1];
+        CallsMonitor monitor = new CallsMonitor();
+
+        btree = BTree.update(btree, CMP, Arrays.asList(1), monitor);
+        assertArrayEquals(new Object[] {1}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(1));
+
+        monitor.clear();
+        btree = BTree.update(btree, CMP, Arrays.asList(2), monitor);
+        assertArrayEquals(new Object[] {1, 2, null}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(2));
+
+        // with existing value
+        monitor.clear();
+        btree = BTree.update(btree, CMP, Arrays.asList(1), monitor);
+        assertArrayEquals(new Object[] {1, 2, null}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(1));
+
+        // with two non-existing values
+        monitor.clear();
+        btree = BTree.update(btree, CMP, Arrays.asList(3, 4), monitor);
+        assertArrayEquals(new Object[] {1, 2, 3, 4, null}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(3));
+        assertEquals(1, monitor.getNumberOfCalls(4));
+
+        // with one existing value and one non existing value
+        monitor.clear();
+        btree = BTree.update(btree, CMP, Arrays.asList(2, 5), monitor);
+        assertArrayEquals(new Object[] {3, new Object[]{1, 2, null}, new 
Object[]{4, 5, null},  new int[]{2, 5}}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(2));
+        assertEquals(1, monitor.getNumberOfCalls(5));
+    }
+
+    /**
+     * Tests that the apply method of the <code>UpdateFunction</code> is only 
called once per value with each build call.
+     */
+    @Test
+    public void testBuilding_UpdateFunctionCallBack()
+    {
+        CallsMonitor monitor = new CallsMonitor();
+        Object[] btree = BTree.build(Arrays.asList(1), monitor);
+        assertArrayEquals(new Object[] {1}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(1));
+
+        monitor.clear();
+        btree = BTree.build(Arrays.asList(1, 2), monitor);
+        assertArrayEquals(new Object[] {1, 2, null}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(1));
+        assertEquals(1, monitor.getNumberOfCalls(2));
+
+        monitor.clear();
+        btree = BTree.build(Arrays.asList(1, 2, 3), monitor);
+        assertArrayEquals(new Object[] {1, 2, 3}, btree);
+        assertEquals(1, monitor.getNumberOfCalls(1));
+        assertEquals(1, monitor.getNumberOfCalls(2));
+        assertEquals(1, monitor.getNumberOfCalls(3));
+    }
+
+    /**
+     * Tests that the apply method of the <code>QuickResolver</code> is called 
exactly once per duplicate value
+     */
+    @Test
+    public void testBuilder_QuickResolver()
+    {
+        // for numbers x in 1..N, we repeat x x times, and resolve values to 
their sum,
+        // so that the resulting tree is of square numbers
+        BTree.Builder.QuickResolver<Accumulator> resolver = (a, b) -> new 
Accumulator(a.base, a.sum + b.sum);
+
+        for (int count = 0 ; count < 10 ; count ++)
+        {
+            BTree.Builder<Accumulator> builder;
+            // first check we produce the right output for sorted input
+            List<Accumulator> sorted = resolverInput(count, false);
+            builder = BTree.builder(Comparator.naturalOrder());
+            builder.setQuickResolver(resolver);
+            for (Accumulator i : sorted)
+                builder.add(i);
+            // for sorted input, check non-resolve path works before checking 
resolution path
+            checkResolverOutput(count, builder.build(), BTree.Dir.ASC);
+            builder = BTree.builder(Comparator.naturalOrder());
+            builder.setQuickResolver(resolver);
+            for (int i = 0 ; i < 10 ; i++)
+            {
+                // now do a few runs of randomized inputs
+                for (Accumulator j : resolverInput(count, true))
+                    builder.add(j);
+                checkResolverOutput(count, builder.build(), BTree.Dir.ASC);
+                builder = BTree.builder(Comparator.naturalOrder());
+                builder.setQuickResolver(resolver);
+            }
+            for (List<Accumulator> add : splitResolverInput(count))
+            {
+                if (ThreadLocalRandom.current().nextBoolean())
+                    builder.addAll(add);
+                else
+                    builder.addAll(new TreeSet<>(add));
+            }
+            checkResolverOutput(count, builder.build(), BTree.Dir.ASC);
+        }
+    }
+
+    @Test
+    public void testBuilderReuse()
+    {
+        List<Integer> sorted = seq(20);
+        BTree.Builder<Integer> builder = 
BTree.builder(Comparator.naturalOrder());
+        builder.auto(false);
+        for (int i : sorted)
+            builder.add(i);
+        checkResult(20, builder.build());
+
+        builder.reuse();
+        assertTrue(builder.build() == BTree.empty());
+        for (int i = 0; i < 12; i++)
+            builder.add(sorted.get(i));
+        checkResult(12, builder.build());
+
+        builder.auto(true);
+        builder.reuse(Comparator.reverseOrder());
+        for (int i = 0; i < 12; i++)
+            builder.add(sorted.get(i));
+        checkResult(12, builder.build(), BTree.Dir.DESC);
+
+        builder.reuse();
+        assertTrue(builder.build() == BTree.empty());
+    }
+
+    private static class Accumulator extends Number implements 
Comparable<Accumulator>
+    {
+        final int base;
+        final int sum;
+        private Accumulator(int base, int sum)
+        {
+            this.base = base;
+            this.sum = sum;
+        }
+
+        public int compareTo(Accumulator that) { return Integer.compare(base, 
that.base); }
+        public int intValue() { return sum; }
+        public long longValue() { return sum; }
+        public float floatValue() { return sum; }
+        public double doubleValue() { return sum; }
+    }
+
+    /**
+     * Tests that the apply method of the <code>Resolver</code> is called 
exactly once per unique value
+     */
+    @Test
+    public void testBuilder_ResolverAndReverse()
+    {
+        // for numbers x in 1..N, we repeat x x times, and resolve values to 
their sum,
+        // so that the resulting tree is of square numbers
+        BTree.Builder.Resolver resolver = (array, lb, ub) -> {
+            int sum = 0;
+            for (int i = lb ; i < ub ; i++)
+                sum += ((Accumulator) array[i]).sum;
+            return new Accumulator(((Accumulator) array[lb]).base, sum);
+        };
+
+        for (int count = 0 ; count < 10 ; count ++)
+        {
+            BTree.Builder<Accumulator> builder;
+            // first check we produce the right output for sorted input
+            List<Accumulator> sorted = resolverInput(count, false);
+            builder = BTree.builder(Comparator.naturalOrder());
+            builder.auto(false);
+            for (Accumulator i : sorted)
+                builder.add(i);
+            // for sorted input, check non-resolve path works before checking 
resolution path
+            Assert.assertTrue(Iterables.elementsEqual(sorted, 
BTree.iterable(builder.build())));
+
+            builder = BTree.builder(Comparator.naturalOrder());
+            builder.auto(false);
+            for (Accumulator i : sorted)
+                builder.add(i);
+            // check resolution path
+            checkResolverOutput(count, builder.resolve(resolver).build(), 
BTree.Dir.ASC);
+
+            builder = BTree.builder(Comparator.naturalOrder());
+            builder.auto(false);
+            for (int i = 0 ; i < 10 ; i++)
+            {
+                // now do a few runs of randomized inputs
+                for (Accumulator j : resolverInput(count, true))
+                    builder.add(j);
+                checkResolverOutput(count, 
builder.sort().resolve(resolver).build(), BTree.Dir.ASC);
+                builder = BTree.builder(Comparator.naturalOrder());
+                builder.auto(false);
+                for (Accumulator j : resolverInput(count, true))
+                    builder.add(j);
+                checkResolverOutput(count, 
builder.sort().reverse().resolve(resolver).build(), BTree.Dir.DESC);
+                builder = BTree.builder(Comparator.naturalOrder());
+                builder.auto(false);
+            }
+        }
+    }
+
+    private static List<Accumulator> resolverInput(int count, boolean shuffled)
+    {
+        List<Accumulator> result = new ArrayList<>();
+        for (int i = 1 ; i <= count ; i++)
+            for (int j = 0 ; j < i ; j++)
+                result.add(new Accumulator(i, i));
+        if (shuffled)
+        {
+            ThreadLocalRandom random = ThreadLocalRandom.current();
+            for (int i = 0 ; i < result.size() ; i++)
+            {
+                int swapWith = random.nextInt(i, result.size());
+                Accumulator t = result.get(swapWith);
+                result.set(swapWith, result.get(i));
+                result.set(i, t);
+            }
+        }
+        return result;
+    }
+
+    private static List<List<Accumulator>> splitResolverInput(int count)
+    {
+        List<Accumulator> all = resolverInput(count, false);
+        List<List<Accumulator>> result = new ArrayList<>();
+        while (!all.isEmpty())
+        {
+            List<Accumulator> is = new ArrayList<>();
+            int prev = -1;
+            for (Accumulator i : new ArrayList<>(all))
+            {
+                if (i.base == prev)
+                    continue;
+                is.add(i);
+                all.remove(i);
+                prev = i.base;
+            }
+            result.add(is);
+        }
+        return result;
+    }
+
+    private static void checkResolverOutput(int count, Object[] btree, 
BTree.Dir dir)
+    {
+        int i = 1;
+        for (Accumulator current : BTree.<Accumulator>iterable(btree, dir))
+        {
+            Assert.assertEquals(i * i, current.sum);
+            i++;
+        }
+        Assert.assertEquals(i, count + 1);
+    }
+
+    private static void checkResult(int count, Object[] btree)
+    {
+        checkResult(count, btree, BTree.Dir.ASC);
+    }
+
+    private static void checkResult(int count, Object[] btree, BTree.Dir dir)
+    {
+        Iterator<Integer> iter = BTree.slice(btree, CMP, dir);
+        int i = 0;
+        while (iter.hasNext())
+            assertEquals(iter.next(), ints[i++]);
+        assertEquals(count, i);
+    }
+
+    @Test
+    public void testClearOnAbort()
+    {
+        Object[] btree = BTree.build(seq(2), noOp);
+        Object[] copy = Arrays.copyOf(btree, btree.length);
+        BTree.update(btree, CMP, seq(94), new AbortAfterX(90));
+
+        assertArrayEquals(copy, btree);
+
+        btree = BTree.update(btree, CMP, seq(94), noOp);
+        assertTrue(BTree.isWellFormed(btree, CMP));
+    }
+
+    private static final class AbortAfterX implements UpdateFunction<Integer, 
Integer>
+    {
+        int counter;
+        final int abortAfter;
+        private AbortAfterX(int abortAfter)
+        {
+            this.abortAfter = abortAfter;
+        }
+        public Integer apply(Integer replacing, Integer update)
+        {
+            return update;
+        }
+        public boolean abortEarly()
+        {
+            return counter++ > abortAfter;
+        }
+        public void allocated(long heapSize)
+        {
+        }
+        public Integer apply(Integer v)
+        {
+            return v;
+        }
+    }
+
+    /**
+     * <code>UpdateFunction</code> that count the number of call made to apply 
for each value.
+     */
+    public static final class CallsMonitor implements UpdateFunction<Integer, 
Integer>
+    {
+        private int[] numberOfCalls = new int[20];
+
+        public Integer apply(Integer replacing, Integer update)
+        {
+            numberOfCalls[update] = numberOfCalls[update] + 1;
+            return update;
+        }
+
+        public boolean abortEarly()
+        {
+            return false;
+        }
+
+        public void allocated(long heapSize)
+        {
+
+        }
+
+        public Integer apply(Integer integer)
+        {
+            numberOfCalls[integer] = numberOfCalls[integer] + 1;
+            return integer;
+        }
+
+        public int getNumberOfCalls(Integer key)
+        {
+            return numberOfCalls[key];
+        }
+
+        public void clear()
+        {
+            Arrays.fill(numberOfCalls, 0);
+        }
+    }
+
+    @Test
+    public void testTransformAndFilter()
+    {
+        List<Integer> r = seq(100);
+
+        Object[] b1 = BTree.build(r, UpdateFunction.noOp());
+
+        // replace all values
+        Object[] b2 = BTree.transformAndFilter(b1, (x) -> (Integer) x * 2);
+        assertEquals(BTree.size(b1), BTree.size(b2));
+
+        // remove odd numbers
+        Object[] b3 = BTree.transformAndFilter(b1, (x) -> (Integer) x % 2 == 1 
? x : null);
+        assertEquals(BTree.size(b1) / 2, BTree.size(b3));
+
+        // remove all values
+        Object[] b4 = BTree.transformAndFilter(b1, (x) -> null);
+        assertEquals(0, BTree.size(b4));
+    }
+
+    private <C, K extends C, V extends C> Object[] 
buildBTreeLegacy(Iterable<K> source, UpdateFunction<K, V> updateF, int size)
+    {
+        assert updateF != null;
+        NodeBuilder current = new NodeBuilder();
+
+        while ((size >>= FAN_SHIFT) > 0)
+            current = current.ensureChild();
+
+        current.reset(EMPTY_LEAF, POSITIVE_INFINITY, updateF, null);
+        for (K key : source)
+            current.addNewKey(key);
+
+        current = current.ascendToRoot();
+
+        Object[] r = current.toNode();
+        current.clear();
+        return r;
+    }
+
+    // Basic BTree validation to check the values and sizeOffsets. Return tree 
size.
+    private int validateBTree(Object[] tree, int[] startingPos, boolean isRoot)
+    {
+        if (BTree.isLeaf(tree))
+        {
+            int size = BTree.size(tree);
+            if (!isRoot)
+            {
+                assertTrue(size >= FAN_FACTOR / 2);
+                assertTrue(size <= FAN_FACTOR);
+            }
+            for (int i = 0; i < size; i++)
+            {
+                assertEquals((int)tree[i], startingPos[0]);
+                startingPos[0]++;
+            }
+            return size;
+        }
+
+        int childNum = BTree.getChildCount(tree);
+        assertTrue(childNum >= FAN_FACTOR / 2);
+        assertTrue(childNum <= FAN_FACTOR + 1);
+
+        int childStart = BTree.getChildStart(tree);
+        int[] sizeOffsets = BTree.getSizeMap(tree);
+        int pos = 0;
+        for (int i = 0; i < childNum; i++)
+        {
+            int childSize = validateBTree((Object[])tree[i + childStart], 
startingPos, false);
+
+            pos += childSize;
+            assertEquals(sizeOffsets[i], pos);
+            if (i != childNum - 1)
+            {
+                assertEquals((int)tree[i], startingPos[0]);
+                pos++;
+                startingPos[0]++;
+            }
+
+        }
+        return BTree.size(tree);
+    }
+
+    @Test
+    public void testBuildTree()
+    {
+        int maxCount = 1000;
+
+        for (int count = 0; count < maxCount; count++)
+        {
+            List<Integer> r = seq(count);
+            Object[] b1 = BTree.build(r, UpdateFunction.noOp());
+            Object[] b2 = buildBTreeLegacy(r, UpdateFunction.noOp(), count);
+            assertTrue(BTree.equals(b1, b2));
+
+            int[] startingPos = new int[1];
+            startingPos[0] = 0;
+            assertEquals(count, validateBTree(b1, startingPos, true));
+            startingPos[0] = 0;
+            assertEquals(count, validateBTree(b2, startingPos, true));
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to