http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/BTreeSet.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/BTreeSet.java b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java new file mode 100644 index 0000000..d1271fc --- /dev/null +++ b/src/java/org/apache/cassandra/utils/btree/BTreeSet.java @@ -0,0 +1,642 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.cassandra.utils.btree; + +import java.util.*; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Ordering; + +import static org.apache.cassandra.utils.btree.BTree.findIndex; +import static org.apache.cassandra.utils.btree.BTree.lower; +import static org.apache.cassandra.utils.btree.BTree.toArray; + +public class BTreeSet<V> implements NavigableSet<V>, List<V> +{ + protected final Comparator<? super V> comparator; + protected final Object[] tree; + + public BTreeSet(Object[] tree, Comparator<? super V> comparator) + { + this.tree = tree; + this.comparator = comparator; + } + + public BTreeSet<V> update(Collection<V> updateWith) + { + return new BTreeSet<>(BTree.update(tree, comparator, updateWith, UpdateFunction.<V>noOp()), comparator); + } + + @Override + public Comparator<? super V> comparator() + { + return comparator; + } + + protected BTreeSearchIterator<V, V> slice(boolean forwards) + { + return BTree.slice(tree, comparator, forwards); + } + + public Object[] tree() + { + return tree; + } + + /** + * The index of the item within the list, or its insertion point otherwise. i.e. binarySearch semantics + */ + public int indexOf(Object item) + { + return findIndex(tree, comparator, (V) item); + } + + /** + * The converse of indexOf: provided an index between 0 and size, returns the i'th item, in set order. + */ + public V get(int index) + { + return BTree.<V>findByIndex(tree, index); + } + + public int lastIndexOf(Object o) + { + return indexOf(o); + } + + public BTreeSet<V> subList(int fromIndex, int toIndex) + { + return new BTreeRange<V>(tree, comparator, fromIndex, toIndex - 1); + } + + @Override + public int size() + { + return BTree.size(tree); + } + + @Override + public boolean isEmpty() + { + return BTree.isEmpty(tree); + } + + @Override + public BTreeSearchIterator<V, V> iterator() + { + return slice(true); + } + + @Override + public BTreeSearchIterator<V, V> descendingIterator() + { + return slice(false); + } + + @Override + public Object[] toArray() + { + return toArray(new Object[0]); + } + + @Override + public <T> T[] toArray(T[] a) + { + return toArray(a, 0); + } + + public <T> T[] toArray(T[] a, int offset) + { + int size = size(); + if (a.length < size + offset) + a = Arrays.copyOf(a, size); + BTree.toArray(tree, a, offset); + return a; + } + + public Spliterator<V> spliterator() + { + return Spliterators.spliterator(this, Spliterator.ORDERED | Spliterator.DISTINCT | Spliterator.IMMUTABLE | Spliterator.NONNULL | Spliterator.SIZED); + } + + @Override + public BTreeSet<V> subSet(V fromElement, boolean fromInclusive, V toElement, boolean toInclusive) + { + return new BTreeRange<>(tree, comparator, fromElement, fromInclusive, toElement, toInclusive); + } + + @Override + public BTreeSet<V> headSet(V toElement, boolean inclusive) + { + return new BTreeRange<>(tree, comparator, null, true, toElement, inclusive); + } + + @Override + public BTreeSet<V> tailSet(V fromElement, boolean inclusive) + { + return new BTreeRange<>(tree, comparator, fromElement, inclusive, null, true); + } + + @Override + public SortedSet<V> subSet(V fromElement, V toElement) + { + return subSet(fromElement, true, toElement, false); + } + + @Override + public SortedSet<V> headSet(V toElement) + { + return headSet(toElement, false); + } + + @Override + public SortedSet<V> tailSet(V fromElement) + { + return tailSet(fromElement, true); + } + + @Override + public BTreeSet<V> descendingSet() + { + return new BTreeRange<>(this.tree, this.comparator).descendingSet(); + } + + @Override + public V first() + { + return get(0); + } + + @Override + public V last() + { + return get(size() - 1); + } + + @Override + public V lower(V v) + { + return BTree.lower(tree, comparator, v); + } + + @Override + public V floor(V v) + { + return BTree.floor(tree, comparator, v); + } + + @Override + public V ceiling(V v) + { + return BTree.ceil(tree, comparator, v); + } + + @Override + public V higher(V v) + { + return BTree.higher(tree, comparator, v); + } + + @Override + public boolean contains(Object o) + { + return indexOf((V) o) >= 0; + } + + @Override + public boolean containsAll(Collection<?> c) + { + // TODO: if we ever use this method, it can be specialized quite easily for SortedSet arguments + for (Object o : c) + if (!contains(o)) + return false; + return true; + } + + public int hashCode() + { + // we can't just delegate to Arrays.deepHashCode(), + // because two equivalent sets may be represented by differently shaped trees + int result = 1; + for (V v : this) + result = 31 * result + Objects.hashCode(v); + return result; + } + + @Override + public boolean addAll(Collection<? extends V> c) + { + throw new UnsupportedOperationException(); + } + + public boolean addAll(int index, Collection<? extends V> c) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean retainAll(Collection<?> c) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean removeAll(Collection<?> c) + { + throw new UnsupportedOperationException(); + } + + @Override + public void clear() + { + throw new UnsupportedOperationException(); + } + + @Override + public V pollFirst() + { + throw new UnsupportedOperationException(); + } + + @Override + public V pollLast() + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean add(V v) + { + throw new UnsupportedOperationException(); + } + + @Override + public boolean remove(Object o) + { + throw new UnsupportedOperationException(); + } + + public V set(int index, V element) + { + throw new UnsupportedOperationException(); + } + + public void add(int index, V element) + { + throw new UnsupportedOperationException(); + } + + public V remove(int index) + { + throw new UnsupportedOperationException(); + } + + public ListIterator<V> listIterator() + { + throw new UnsupportedOperationException(); + } + + public ListIterator<V> listIterator(int index) + { + throw new UnsupportedOperationException(); + } + + public static class BTreeRange<V> extends BTreeSet<V> + { + // both inclusive + protected final int lowerBound, upperBound; + BTreeRange(Object[] tree, Comparator<? super V> comparator) + { + this(tree, comparator, null, true, null, true); + } + + BTreeRange(BTreeRange<V> from) + { + super(from.tree, from.comparator); + this.lowerBound = from.lowerBound; + this.upperBound = from.upperBound; + } + + BTreeRange(Object[] tree, Comparator<? super V> comparator, int lowerBound, int upperBound) + { + super(tree, comparator); + if (upperBound < lowerBound - 1) + upperBound = lowerBound - 1; + this.lowerBound = lowerBound; + this.upperBound = upperBound; + } + + BTreeRange(Object[] tree, Comparator<? super V> comparator, V lowerBound, boolean inclusiveLowerBound, V upperBound, boolean inclusiveUpperBound) + { + this(tree, comparator, + lowerBound == null ? 0 : inclusiveLowerBound ? BTree.ceilIndex(tree, comparator, lowerBound) + : BTree.higherIndex(tree, comparator, lowerBound), + upperBound == null ? BTree.size(tree) - 1 : inclusiveUpperBound ? BTree.floorIndex(tree, comparator, upperBound) + : BTree.lowerIndex(tree, comparator, upperBound)); + } + + // narrowing range constructor - makes this the intersection of the two ranges over the same tree b + BTreeRange(BTreeRange<V> a, BTreeRange<V> b) + { + this(a.tree, a.comparator, Math.max(a.lowerBound, b.lowerBound), Math.min(a.upperBound, b.upperBound)); + assert a.tree == b.tree; + } + + @Override + protected BTreeSearchIterator<V, V> slice(boolean forwards) + { + return new BTreeSearchIterator<>(tree, comparator, forwards, lowerBound, upperBound); + } + + @Override + public boolean isEmpty() + { + return upperBound < lowerBound; + } + + public int size() + { + return (upperBound - lowerBound) + 1; + } + + boolean outOfBounds(int i) + { + return (i < lowerBound) | (i > upperBound); + } + + public V get(int index) + { + index += lowerBound; + if (outOfBounds(index)) + throw new NoSuchElementException(); + return super.get(index); + } + + public int indexOf(Object item) + { + int i = super.indexOf(item); + boolean negate = i < 0; + if (negate) + i = -1 - i; + if (outOfBounds(i)) + return i < lowerBound ? -1 : -1 - size(); + i = i - lowerBound; + if (negate) + i = -1 -i; + return i; + } + + public V lower(V v) + { + return maybe(Math.min(upperBound, BTree.lowerIndex(tree, comparator, v))); + } + + public V floor(V v) + { + return maybe(Math.min(upperBound, BTree.floorIndex(tree, comparator, v))); + } + + public V ceiling(V v) + { + return maybe(Math.max(lowerBound, BTree.ceilIndex(tree, comparator, v))); + } + + public V higher(V v) + { + return maybe(Math.max(lowerBound, BTree.higherIndex(tree, comparator, v))); + } + + private V maybe(int i) + { + if (outOfBounds(i)) + return null; + return super.get(i); + } + + @Override + public BTreeSet<V> subSet(V fromElement, boolean fromInclusive, V toElement, boolean toInclusive) + { + return new BTreeRange<>(this, new BTreeRange<>(tree, comparator, fromElement, fromInclusive, toElement, toInclusive)); + } + + @Override + public BTreeSet<V> headSet(V toElement, boolean inclusive) + { + return new BTreeRange<>(this, new BTreeRange<>(tree, comparator, null, true, toElement, inclusive)); + } + + @Override + public BTreeSet<V> tailSet(V fromElement, boolean inclusive) + { + return new BTreeRange<>(this, new BTreeRange<>(tree, comparator, fromElement, inclusive, null, true)); + } + + @Override + public BTreeSet<V> descendingSet() + { + return new BTreeDescRange<>(this); + } + + public BTreeSet<V> subList(int fromIndex, int toIndex) + { + if (fromIndex < 0 || toIndex > size()) + throw new IndexOutOfBoundsException(); + return new BTreeRange<V>(tree, comparator, lowerBound + fromIndex, lowerBound + toIndex - 1); + } + + @Override + public <T> T[] toArray(T[] a) + { + return toArray(a, 0); + } + + public <T> T[] toArray(T[] a, int offset) + { + if (size() + offset < a.length) + a = Arrays.copyOf(a, size() + offset); + + BTree.toArray(tree, lowerBound, upperBound + 1, a, offset); + return a; + } + } + + public static class BTreeDescRange<V> extends BTreeRange<V> + { + BTreeDescRange(BTreeRange<V> from) + { + super(from.tree, from.comparator, from.lowerBound, from.upperBound); + } + + @Override + protected BTreeSearchIterator<V, V> slice(boolean forwards) + { + return super.slice(!forwards); + } + + /* Flip the methods we call for inequality searches */ + + public V higher(V v) + { + return super.lower(v); + } + + public V ceiling(V v) + { + return super.floor(v); + } + + public V floor(V v) + { + return super.ceiling(v); + } + + public V lower(V v) + { + return super.higher(v); + } + + public V get(int index) + { + index = upperBound - index; + if (outOfBounds(index)) + throw new NoSuchElementException(); + return BTree.findByIndex(tree, index); + } + + public int indexOf(Object item) + { + int i = super.indexOf(item); + // i is in range [-1 - size()..size()) + // so we just need to invert by adding/subtracting from size + return i < 0 ? -2 - size() - i : size() - (i + 1); + } + + public BTreeSet<V> subList(int fromIndex, int toIndex) + { + if (fromIndex < 0 || toIndex > size()) + throw new IndexOutOfBoundsException(); + return new BTreeDescRange<V>(new BTreeRange<V>(tree, comparator, upperBound - (toIndex - 1), upperBound - fromIndex)); + } + + @Override + public BTreeSet<V> subSet(V fromElement, boolean fromInclusive, V toElement, boolean toInclusive) + { + return super.subSet(toElement, toInclusive, fromElement, fromInclusive).descendingSet(); + } + + @Override + public BTreeSet<V> headSet(V toElement, boolean inclusive) + { + return super.tailSet(toElement, inclusive).descendingSet(); + } + + @Override + public BTreeSet<V> tailSet(V fromElement, boolean inclusive) + { + return super.headSet(fromElement, inclusive).descendingSet(); + } + + @Override + public BTreeSet<V> descendingSet() + { + return new BTreeRange<>(this); + } + + public Comparator<V> comparator() + { + return (a, b) -> comparator.compare(b, a); + } + + public <T> T[] toArray(T[] a, int offset) + { + a = super.toArray(a, offset); + int count = size(); + int flip = count / 2; + for (int i = 0 ; i < flip ; i++) + { + int j = count - (i + 1); + T t = a[i + offset]; + a[i + offset] = a[j + offset]; + a[j + offset] = t; + } + return a; + } + } + + public static class Builder<V> + { + final BTree.Builder<V> builder; + protected Builder(Comparator<? super V> comparator) + { + builder= BTree.builder(comparator); + } + + public Builder<V> add(V v) + { + builder.add(v); + return this; + } + + public Builder<V> addAll(Collection<V> iter) + { + builder.addAll(iter); + return this; + } + + public boolean isEmpty() + { + return builder.isEmpty(); + } + public BTreeSet<V> build() + { + return new BTreeSet<>(builder.build(), builder.comparator); + } + } + + public static <V> Builder<V> builder(Comparator<? super V> comparator) + { + return new Builder<>(comparator); + } + + public static <V> BTreeSet<V> wrap(Object[] btree, Comparator<V> comparator) + { + return new BTreeSet<>(btree, comparator); + } + + public static <V extends Comparable<V>> BTreeSet<V> of(Collection<V> sortedValues) + { + return new BTreeSet<>(BTree.build(sortedValues, UpdateFunction.<V>noOp()), Ordering.<V>natural()); + } + + public static <V extends Comparable<V>> BTreeSet<V> of(V value) + { + return new BTreeSet<>(BTree.build(ImmutableList.of(value), UpdateFunction.<V>noOp()), Ordering.<V>natural()); + } + + public static <V> BTreeSet<V> empty(Comparator<? super V> comparator) + { + return new BTreeSet<>(BTree.empty(), comparator); + } + + public static <V> BTreeSet<V> of(Comparator<? super V> comparator, V value) + { + return new BTreeSet<>(BTree.build(ImmutableList.of(value), UpdateFunction.<V>noOp()), comparator); + } +}
http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/Builder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/Builder.java b/src/java/org/apache/cassandra/utils/btree/Builder.java deleted file mode 100644 index b835554..0000000 --- a/src/java/org/apache/cassandra/utils/btree/Builder.java +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.cassandra.utils.btree; - -import java.util.Comparator; - -import static org.apache.cassandra.utils.btree.BTree.EMPTY_LEAF; -import static org.apache.cassandra.utils.btree.BTree.FAN_SHIFT; -import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY; - -/** - * A class for constructing a new BTree, either from an existing one and some set of modifications - * or a new tree from a sorted collection of items. - * <p/> - * This is a fairly heavy-weight object, so a ThreadLocal instance is created for making modifications to a tree - */ -final class Builder -{ - private final NodeBuilder rootBuilder = new NodeBuilder(); - - /** - * At the highest level, we adhere to the classic b-tree insertion algorithm: - * - * 1. Add to the appropriate leaf - * 2. Split the leaf if necessary, add the median to the parent - * 3. Split the parent if necessary, etc. - * - * There is one important difference: we don't actually modify the original tree, but copy each node that we - * modify. Note that every node on the path to the key being inserted or updated will be modified; this - * implies that at a minimum, the root node will be modified for every update, so every root is a "snapshot" - * of a tree that can be iterated or sliced without fear of concurrent modifications. - * - * The NodeBuilder class handles the details of buffering the copied contents of the original tree and - * adding in our changes. Since NodeBuilder maintains parent/child references, it also handles parent-splitting - * (easy enough, since any node affected by the split will already be copied into a NodeBuilder). - * - * One other difference from the simple algorithm is that we perform modifications in bulk; - * we assume @param source has been sorted, e.g. by BTree.update, so the update of each key resumes where - * the previous left off. - */ - public <C, K extends C, V extends C> Object[] update(Object[] btree, Comparator<C> comparator, Iterable<K> source, UpdateFunction<K, V> updateF) - { - assert updateF != null; - - NodeBuilder current = rootBuilder; - current.reset(btree, POSITIVE_INFINITY, updateF, comparator); - - for (K key : source) - { - while (true) - { - if (updateF.abortEarly()) - { - rootBuilder.clear(); - return null; - } - NodeBuilder next = current.update(key); - if (next == null) - break; - // we were in a subtree from a previous key that didn't contain this new key; - // retry against the correct subtree - current = next; - } - } - - // finish copying any remaining keys from the original btree - while (true) - { - NodeBuilder next = current.finish(); - if (next == null) - break; - current = next; - } - - // updating with POSITIVE_INFINITY means that current should be back to the root - assert current.isRoot(); - - Object[] r = current.toNode(); - current.clear(); - return r; - } - - public <C, K extends C, V extends C> Object[] build(Iterable<K> source, UpdateFunction<K, V> updateF, int size) - { - assert updateF != null; - - NodeBuilder current = rootBuilder; - // we descend only to avoid wasting memory; in update() we will often descend into existing trees - // so here we want to descend also, so we don't have lg max(N) depth in both directions - while ((size >>= FAN_SHIFT) > 0) - current = current.ensureChild(); - - current.reset(EMPTY_LEAF, POSITIVE_INFINITY, updateF, null); - for (K key : source) - current.addNewKey(updateF.apply(key)); - - current = current.ascendToRoot(); - - Object[] r = current.toNode(); - current.clear(); - return r; - } -} http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/Cursor.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/Cursor.java b/src/java/org/apache/cassandra/utils/btree/Cursor.java deleted file mode 100644 index 6814d26..0000000 --- a/src/java/org/apache/cassandra/utils/btree/Cursor.java +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.cassandra.utils.btree; - -import java.util.Comparator; -import java.util.Iterator; - -import static org.apache.cassandra.utils.btree.BTree.NEGATIVE_INFINITY; -import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY; -import static org.apache.cassandra.utils.btree.BTree.getLeafKeyEnd; -import static org.apache.cassandra.utils.btree.BTree.isLeaf; - -/** - * An extension of Path which provides a public interface for iterating over or counting a subrange of the tree - * - * @param <V> - */ -public final class Cursor<K, V extends K> extends Path implements Iterator<V> -{ - /* - * Conceptually, a Cursor derives two Paths, one for the first object in the slice requested (inclusive), - * and one for the last (exclusive). Then hasNext just checks, have we reached the last yet, and next - * calls successor() to get to the next item in the Tree. - * - * To optimize memory use, we summarize the last Path as just endNode/endIndex, and inherit from Path for - * - * the first one. - */ - - // the last node covered by the requested range - private Object[] endNode; - // the index within endNode that signals we're finished -- that is, endNode[endIndex] is NOT part of the Cursor - private byte endIndex; - - private boolean forwards; - - /** - * Reset this cursor for the provided tree, to iterate over its entire range - * - * @param btree the tree to iterate over - * @param forwards if false, the cursor will start at the end and move backwards - */ - public void reset(Object[] btree, boolean forwards) - { - _reset(btree, null, NEGATIVE_INFINITY, false, POSITIVE_INFINITY, false, forwards); - } - - /** - * Reset this cursor for the provided tree, to iterate between the provided start and end - * - * @param btree the tree to iterate over - * @param comparator the comparator that defines the ordering over the items in the tree - * @param lowerBound the first item to include, inclusive - * @param upperBound the last item to include, exclusive - * @param forwards if false, the cursor will start at the end and move backwards - */ - public void reset(Object[] btree, Comparator<K> comparator, K lowerBound, K upperBound, boolean forwards) - { - _reset(btree, comparator, lowerBound, true, upperBound, false, forwards); - } - - /** - * Reset this cursor for the provided tree, to iterate between the provided start and end - * - * @param btree the tree to iterate over - * @param comparator the comparator that defines the ordering over the items in the tree - * @param lowerBound the first item to include - * @param inclusiveLowerBound should include start in the iterator, if present in the tree - * @param upperBound the last item to include - * @param inclusiveUpperBound should include end in the iterator, if present in the tree - * @param forwards if false, the cursor will start at the end and move backwards - */ - public void reset(Object[] btree, Comparator<K> comparator, K lowerBound, boolean inclusiveLowerBound, K upperBound, boolean inclusiveUpperBound, boolean forwards) - { - _reset(btree, comparator, lowerBound, inclusiveLowerBound, upperBound, inclusiveUpperBound, forwards); - } - - private void _reset(Object[] btree, Comparator<K> comparator, Object lowerBound, boolean inclusiveLowerBound, Object upperBound, boolean inclusiveUpperBound, boolean forwards) - { - init(btree); - if (lowerBound == null) - lowerBound = NEGATIVE_INFINITY; - if (upperBound == null) - upperBound = POSITIVE_INFINITY; - - this.forwards = forwards; - - Path findLast = new Path(this.path.length, btree); - if (forwards) - { - findLast.find(comparator, upperBound, inclusiveUpperBound ? Op.HIGHER : Op.CEIL, true); - find(comparator, lowerBound, inclusiveLowerBound ? Op.CEIL : Op.HIGHER, true); - } - else - { - findLast.find(comparator, lowerBound, inclusiveLowerBound ? Op.LOWER : Op.FLOOR, false); - find(comparator, upperBound, inclusiveUpperBound ? Op.FLOOR : Op.LOWER, false); - } - int c = this.compareTo(findLast, forwards); - if (forwards ? c > 0 : c < 0) - { - endNode = currentNode(); - endIndex = currentIndex(); - } - else - { - endNode = findLast.currentNode(); - endIndex = findLast.currentIndex(); - } - } - - public boolean hasNext() - { - return path[depth] != endNode || indexes[depth] != endIndex; - } - - public V next() - { - Object r = currentKey(); - if (forwards) - successor(); - else - predecessor(); - return (V) r; - } - - public int count() - { - if (!forwards) - throw new IllegalStateException("Count can only be run on forward cursors"); - int count = 0; - int next; - while ((next = consumeNextLeaf()) >= 0) - count += next; - return count; - } - - /** - * @return the number of objects consumed by moving out of the next (possibly current) leaf - */ - private int consumeNextLeaf() - { - Object[] node = currentNode(); - int r = 0; - - if (!isLeaf(node)) - { - // if we're not in a leaf, then calling successor once will take us to a leaf, since the next - // key will be in the leftmost subtree of whichever branch is next. For instance, if we - // are in the root node of the tree depicted by http://cis.stvincent.edu/html/tutorials/swd/btree/btree1.gif, - // successor() will take us to the leaf containing N and O. - int i = currentIndex(); - if (node == endNode && i == endIndex) - return -1; - r = 1; - successor(); - node = currentNode(); - } - - if (node == endNode) - { - // only count up to endIndex, and don't call successor() - if (currentIndex() == endIndex) - return r > 0 ? r : -1; - r += endIndex - currentIndex(); - setIndex(endIndex); - return r; - } - - // count the remaining objects in this leaf - int keyEnd = getLeafKeyEnd(node); - r += keyEnd - currentIndex(); - setIndex(keyEnd); - successor(); - return r; - } - - public void remove() - { - throw new UnsupportedOperationException(); - } -} http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java b/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java index c715873..83fc661 100644 --- a/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java +++ b/src/java/org/apache/cassandra/utils/btree/NodeBuilder.java @@ -23,12 +23,7 @@ import org.apache.cassandra.utils.ObjectSizes; import java.util.Arrays; import java.util.Comparator; -import static org.apache.cassandra.utils.btree.BTree.EMPTY_BRANCH; -import static org.apache.cassandra.utils.btree.BTree.FAN_FACTOR; -import static org.apache.cassandra.utils.btree.BTree.compare; -import static org.apache.cassandra.utils.btree.BTree.find; -import static org.apache.cassandra.utils.btree.BTree.getKeyEnd; -import static org.apache.cassandra.utils.btree.BTree.isLeaf; +import static org.apache.cassandra.utils.btree.BTree.*; /** * Represents a level / stack item of in progress modifications to a BTree. @@ -151,7 +146,7 @@ final class NodeBuilder } else { - i = find(comparator, key, copyFrom, i + 1, copyFromKeyEnd); + i = Arrays.binarySearch(copyFrom, i + 1, copyFromKeyEnd, key, comparator); found = i >= 0; if (!found) i = -i - 1; @@ -167,7 +162,7 @@ final class NodeBuilder return null; key = next; } - else if (i == copyFromKeyEnd && compare(comparator, key, upperBound) >= 0) + else if (i == copyFromKeyEnd && compareUpperBound(comparator, key, upperBound) >= 0) owns = false; if (isLeaf(copyFrom)) @@ -240,6 +235,10 @@ final class NodeBuilder return ascend(); } + private static <V> int compareUpperBound(Comparator<V> comparator, Object value, Object upperBound) + { + return upperBound == POSITIVE_INFINITY ? -1 : comparator.compare((V)value, (V)upperBound); + } // UTILITY METHODS FOR IMPLEMENTATION OF UPDATE/BUILD/DELETE @@ -264,7 +263,7 @@ final class NodeBuilder // builds a new root BTree node - must be called on root of operation Object[] toNode() { - assert buildKeyPosition <= FAN_FACTOR && (buildKeyPosition > 0 || copyFrom.length > 0) : buildKeyPosition; + assert buildKeyPosition <= FAN_FACTOR && (buildKeyPosition > 0 || getKeyEnd(copyFrom) > 0) : buildKeyPosition; return buildFromRange(0, buildKeyPosition, isLeaf(copyFrom), false); } @@ -384,14 +383,25 @@ final class NodeBuilder Object[] a; if (isLeaf) { - a = new Object[keyLength + (keyLength & 1)]; + a = new Object[keyLength | 1]; System.arraycopy(buildKeys, offset, a, 0, keyLength); } else { - a = new Object[1 + (keyLength * 2)]; + a = new Object[2 + (keyLength * 2)]; System.arraycopy(buildKeys, offset, a, 0, keyLength); System.arraycopy(buildChildren, offset, a, keyLength, keyLength + 1); + + // calculate the indexOffsets of each key in this node, within the sub-tree rooted at this node + int[] indexOffsets = new int[keyLength + 1]; + int size = BTree.size((Object[]) a[keyLength]); + for (int i = 0 ; i < keyLength ; i++) + { + indexOffsets[i] = size; + size += 1 + BTree.size((Object[]) a[keyLength + 1 + i]); + } + indexOffsets[keyLength] = size; + a[a.length - 1] = indexOffsets; } if (isExtra) updateFunction.allocated(ObjectSizes.sizeOfArray(a)); http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/NodeCursor.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/NodeCursor.java b/src/java/org/apache/cassandra/utils/btree/NodeCursor.java new file mode 100644 index 0000000..e9fa89e --- /dev/null +++ b/src/java/org/apache/cassandra/utils/btree/NodeCursor.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.cassandra.utils.btree; + +import java.util.Arrays; +import java.util.Comparator; + +import static org.apache.cassandra.utils.btree.BTree.*; + +/** + * A class for searching within one node of a btree: a linear chain (stack) of these is built of tree height + * to form a Cursor. Some corollaries of the basic building block operations in TreeCursor (moveOne and seekTo), + * along with some other methods for helping implement movement between two NodeCursor + * + * The behaviour is not dissimilar to that of NodeBuilder and TreeBuilder, wherein functions that may move + * us to a different node pass us the node we should move to, from which we continue our operations. + * @param <K> + */ +class NodeCursor<K> +{ + // TODO: consider splitting forwards from backwards + final NodeCursor<K> parent, child; + final Comparator<? super K> comparator; + + boolean inChild; + // if !inChild, this is the key position we are currently on; + // if inChild, this is the child position we are currently descending into + int position; + Object[] node; + int nodeOffset; + + NodeCursor(Object[] node, NodeCursor<K> parent, Comparator<? super K> comparator) + { + this.node = node; + this.parent = parent; + this.comparator = comparator; + // a well formed b-tree (text book, or ours) must be balanced, so by building a stack following the left-most branch + // we have a stack capable of visiting any path in the tree + this.child = BTree.isLeaf(node) ? null : new NodeCursor<>((Object[]) node[getChildStart(node)], this, comparator); + } + + void resetNode(Object[] node, int nodeOffset) + { + this.node = node; + this.nodeOffset = nodeOffset; + } + + /** + * adapt child position to key position within branch, knowing it is safe to do so + */ + void safeAdvanceIntoBranchFromChild(boolean forwards) + { + if (!forwards) + --position; + } + + /** + * adapt child position to key position within branch, and return if this was successful or we're now out of bounds + */ + boolean advanceIntoBranchFromChild(boolean forwards) + { + return forwards ? position < getBranchKeyEnd(node) : --position >= 0; + } + + boolean advanceLeafNode(boolean forwards) + { + return forwards ? ++position < getLeafKeyEnd(node) + : --position >= 0; + } + + /** + * @return the upper/lower bound of the child we are currently descended in + */ + K bound(boolean upper) + { + return (K) node[position - (upper ? 0 : 1)]; + } + + /** + * The parent that covers a range wider than ourselves, either ascending or descending, + * i.e. that defines the upper or lower bound on the subtree rooted at our node + * @param upper + * @return the NodeCursor parent that can tell us the upper/lower bound of ourselves + */ + NodeCursor<K> boundIterator(boolean upper) + { + NodeCursor<K> bound = this.parent; + while (bound != null && (upper ? bound.position >= getChildCount(bound.node) - 1 + : bound.position <= 0)) + bound = bound.parent; + return bound; + } + + /** + * look for the provided key in this node, in the specified direction: + * forwards => ceil search; otherwise floor + * + * we require that the node's "current" key (including the relevant bound if we are a parent we have ascended into) + * be already excluded by the search. this is useful for the following reasons: + * 1: we must ensure we never go backwards, so excluding that key from our binary search prevents our + * descending into a child we have already visited (without any further checks) + * 2: we already check the bounds as we search upwards for our natural parent; + * 3: we want to cheaply check sequential access, so we always check the first key we're on anyway (if it can be done easily) + */ + boolean seekInNode(K key, boolean forwards) + { + int position = this.position; + int lb, ub; + if (forwards) + { + lb = position + 1; + ub = getKeyEnd(node); + } + else + { + ub = position; + lb = 0; + } + + int find = Arrays.binarySearch((K[]) node, lb, ub, key, comparator); + if (find >= 0) + { + // exact key match, so we're in the correct node already. return success + this.position = find; + inChild = false; + return true; + } + + // if we are a branch, and we are an inequality match, the direction of travel doesn't matter + // so we only need to modify if we are going backwards on a leaf node, to produce floor semantics + int delta = isLeaf() & !forwards ? -1 : 0; + this.position = delta -1 -find; + return false; + } + + NodeCursor<K> descendToFirstChild(boolean forwards) + { + if (isLeaf()) + { + position = forwards ? 0 : getLeafKeyEnd(node) - 1; + return null; + } + inChild = true; + position = forwards ? 0 : getChildCount(node) - 1; + return descend(); + } + + // descend into the child at "position" + NodeCursor<K> descend() + { + Object[] childNode = (Object[]) node[position + getChildStart(node)]; + int childOffset = nodeOffset + treeIndexOffsetOfChild(node, position); + child.resetNode(childNode, childOffset); + inChild = true; + return child; + } + + boolean isLeaf() + { + return child == null; + } + + int globalIndex() + { + return nodeOffset + treeIndexOfKey(node, position); + } + + int globalLeafIndex() + { + return nodeOffset + treeIndexOfLeafKey(position); + } + + int globalBranchIndex() + { + return nodeOffset + treeIndexOfBranchKey(node, position); + } + + K value() + { + return (K) node[position]; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/Path.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/Path.java b/src/java/org/apache/cassandra/utils/btree/Path.java deleted file mode 100644 index 9b6789c..0000000 --- a/src/java/org/apache/cassandra/utils/btree/Path.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.cassandra.utils.btree; - -import java.util.Comparator; - -import static org.apache.cassandra.utils.btree.BTree.NEGATIVE_INFINITY; -import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY; -import static org.apache.cassandra.utils.btree.BTree.getBranchKeyEnd; -import static org.apache.cassandra.utils.btree.BTree.getKeyEnd; -import static org.apache.cassandra.utils.btree.BTree.getLeafKeyEnd; -import static org.apache.cassandra.utils.btree.BTree.isLeaf; - -/** - * An internal class for searching and iterating through a tree. As it traverses the tree, - * it adds the nodes visited to a stack. This allows us to backtrack from a child node - * to its parent. - * - * As we navigate the tree, we destructively modify this stack. - * - * Path is only intended to be used via Cursor. - */ -public class Path<V> -{ - // operations corresponding to the ones in NavigableSet - static enum Op - { - CEIL, // the least element greater than or equal to the given element - FLOOR, // the greatest element less than or equal to the given element - HIGHER, // the least element strictly greater than the given element - LOWER // the greatest element strictly less than the given element - } - - // the path to the searched-for key - Object[][] path; - // the index within the node of our path at a given depth - byte[] indexes; - // current depth. nothing in path[i] for i > depth is valid. - byte depth; - - Path() { } - Path(int depth, Object[] btree) - { - this.path = new Object[depth][]; - this.indexes = new byte[depth]; - this.path[0] = btree; - } - - void init(Object[] btree) - { - int depth = BTree.depth(btree); - if (path == null || path.length < depth) - { - path = new Object[depth][]; - indexes = new byte[depth]; - } - path[0] = btree; - } - - void moveEnd(Object[] node, boolean forwards) - { - push(node, getKeyEnd(node)); - if (!forwards) - predecessor(); - } - - void moveStart(Object[] node, boolean forwards) - { - push(node, -1); - if (forwards) - successor(); - } - - /** - * Find the provided key in the tree rooted at node, and store the root to it in the path - * - * @param comparator the comparator defining the order on the tree - * @param target the key to search for - * @param mode the type of search to perform - * @param forwards if the path should be setup for forward or backward iteration - * @param <K> - */ - <K> boolean find(Comparator<K> comparator, Object target, Op mode, boolean forwards) - { - // TODO : should not require parameter 'forwards' - consider modifying index to represent both - // child and key position, as opposed to just key position (which necessitates a different value depending - // on which direction you're moving in. Prerequisite for making Path public and using to implement general - // search - - Object[] node = path[depth]; - int lb = forwards ? indexes[depth] : 0; - pop(); - - if (target instanceof BTree.Special) - { - if (target == POSITIVE_INFINITY) - moveEnd(node, forwards); - else if (target == NEGATIVE_INFINITY) - moveStart(node, forwards); - else - throw new AssertionError(); - return false; - } - - while (true) - { - int keyEnd = getKeyEnd(node); - - // search for the target in the current node - int i = BTree.find(comparator, target, node, lb, keyEnd); - lb = 0; - if (i >= 0) - { - // exact match. transform exclusive bounds into the correct index by moving back or forwards one - push(node, i); - switch (mode) - { - case HIGHER: - successor(); - break; - case LOWER: - predecessor(); - } - return true; - } - i = -i - 1; - - // traverse into the appropriate child - if (!isLeaf(node)) - { - push(node, forwards ? i - 1 : i); - node = (Object[]) node[keyEnd + i]; - continue; - } - - // bottom of the tree and still not found. pick the right index to satisfy Op - switch (mode) - { - case FLOOR: - case LOWER: - i--; - } - - if (i < 0) - { - push(node, 0); - predecessor(); - } - else if (i >= keyEnd) - { - push(node, keyEnd - 1); - successor(); - } - else - { - push(node, i); - } - - return false; - } - } - - boolean isRoot() - { - return depth == 0; - } - - void pop() - { - depth--; - } - - Object[] currentNode() - { - return path[depth]; - } - - byte currentIndex() - { - return indexes[depth]; - } - - void push(Object[] node, int index) - { - path[++depth] = node; - indexes[depth] = (byte) index; - } - - void setIndex(int index) - { - indexes[depth] = (byte) index; - } - - byte findSuccessorParentDepth() - { - byte depth = this.depth; - depth--; - while (depth >= 0) - { - int ub = indexes[depth] + 1; - Object[] node = path[depth]; - if (ub < getBranchKeyEnd(node)) - return depth; - depth--; - } - return -1; - } - - byte findPredecessorParentDepth() - { - byte depth = this.depth; - depth--; - while (depth >= 0) - { - int ub = indexes[depth] - 1; - if (ub >= 0) - return depth; - depth--; - } - return -1; - } - - // move to the next key in the tree - void successor() - { - Object[] node = currentNode(); - int i = currentIndex(); - - if (!isLeaf(node)) - { - // if we're on a key in a branch, we MUST have a descendant either side of us, - // so we always go down the left-most child until we hit a leaf - node = (Object[]) node[getBranchKeyEnd(node) + i + 1]; - while (!isLeaf(node)) - { - push(node, -1); - node = (Object[]) node[getBranchKeyEnd(node)]; - } - push(node, 0); - return; - } - - // if we haven't reached the end of this leaf, just increment our index and return - i += 1; - if (i < getLeafKeyEnd(node)) - { - // moved to the next key in the same leaf - setIndex(i); - return; - } - - // we've reached the end of this leaf, - // so go up until we reach something we've not finished visiting - while (!isRoot()) - { - pop(); - i = currentIndex() + 1; - node = currentNode(); - if (i < getKeyEnd(node)) - { - setIndex(i); - return; - } - } - - // we've visited the last key in the root node, so we're done - setIndex(getKeyEnd(node)); - } - - // move to the previous key in the tree - void predecessor() - { - Object[] node = currentNode(); - int i = currentIndex(); - - if (!isLeaf(node)) - { - // if we're on a key in a branch, we MUST have a descendant either side of us - // so we always go down the right-most child until we hit a leaf - node = (Object[]) node[getBranchKeyEnd(node) + i]; - while (!isLeaf(node)) - { - i = getBranchKeyEnd(node); - push(node, i); - node = (Object[]) node[i * 2]; - } - push(node, getLeafKeyEnd(node) - 1); - return; - } - - // if we haven't reached the beginning of this leaf, just decrement our index and return - i -= 1; - if (i >= 0) - { - setIndex(i); - return; - } - - // we've reached the beginning of this leaf, - // so go up until we reach something we've not finished visiting - while (!isRoot()) - { - pop(); - i = currentIndex() - 1; - if (i >= 0) - { - setIndex(i); - return; - } - } - - // we've visited the last key in the root node, so we're done - setIndex(-1); - } - - Object currentKey() - { - return currentNode()[currentIndex()]; - } - - int compareTo(Path<V> that, boolean forwards) - { - int d = Math.min(this.depth, that.depth); - for (int i = 0; i <= d; i++) - { - int c = this.indexes[i] - that.indexes[i]; - if (c != 0) - return c; - } - // identical indices up to depth, so if somebody is lower depth they are on a later item if iterating forwards - // and an earlier item if iterating backwards, as the node at max common depth must be a branch if they are - // different depths, and branches that are currently descended into lag the child index they are in when iterating forwards, - // i.e. if they are in child 0 they record an index of -1 forwards, or 0 when backwards - d = this.depth - that.depth; - return forwards ? d : -d; - } -} - http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java b/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java new file mode 100644 index 0000000..cb3ddc5 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/btree/TreeBuilder.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.cassandra.utils.btree; + +import java.util.Comparator; + +import static org.apache.cassandra.utils.btree.BTree.EMPTY_LEAF; +import static org.apache.cassandra.utils.btree.BTree.FAN_SHIFT; +import static org.apache.cassandra.utils.btree.BTree.POSITIVE_INFINITY; + +/** + * A class for constructing a new BTree, either from an existing one and some set of modifications + * or a new tree from a sorted collection of items. + * <p/> + * This is a fairly heavy-weight object, so a ThreadLocal instance is created for making modifications to a tree + */ +final class TreeBuilder +{ + private final NodeBuilder rootBuilder = new NodeBuilder(); + + /** + * At the highest level, we adhere to the classic b-tree insertion algorithm: + * + * 1. Add to the appropriate leaf + * 2. Split the leaf if necessary, add the median to the parent + * 3. Split the parent if necessary, etc. + * + * There is one important difference: we don't actually modify the original tree, but copy each node that we + * modify. Note that every node on the path to the key being inserted or updated will be modified; this + * implies that at a minimum, the root node will be modified for every update, so every root is a "snapshot" + * of a tree that can be iterated or sliced without fear of concurrent modifications. + * + * The NodeBuilder class handles the details of buffering the copied contents of the original tree and + * adding in our changes. Since NodeBuilder maintains parent/child references, it also handles parent-splitting + * (easy enough, since any node affected by the split will already be copied into a NodeBuilder). + * + * One other difference from the simple algorithm is that we perform modifications in bulk; + * we assume @param source has been sorted, e.g. by BTree.update, so the update of each key resumes where + * the previous left off. + */ + public <C, K extends C, V extends C> Object[] update(Object[] btree, Comparator<C> comparator, Iterable<K> source, UpdateFunction<K, V> updateF) + { + assert updateF != null; + + NodeBuilder current = rootBuilder; + current.reset(btree, POSITIVE_INFINITY, updateF, comparator); + + for (K key : source) + { + while (true) + { + if (updateF.abortEarly()) + { + rootBuilder.clear(); + return null; + } + NodeBuilder next = current.update(key); + if (next == null) + break; + // we were in a subtree from a previous key that didn't contain this new key; + // retry against the correct subtree + current = next; + } + } + + // finish copying any remaining keys from the original btree + while (true) + { + NodeBuilder next = current.finish(); + if (next == null) + break; + current = next; + } + + // updating with POSITIVE_INFINITY means that current should be back to the root + assert current.isRoot(); + + Object[] r = current.toNode(); + current.clear(); + return r; + } + + public <C, K extends C, V extends C> Object[] build(Iterable<K> source, UpdateFunction<K, V> updateF, int size) + { + assert updateF != null; + + NodeBuilder current = rootBuilder; + // we descend only to avoid wasting memory; in update() we will often descend into existing trees + // so here we want to descend also, so we don't have lg max(N) depth in both directions + while ((size >>= FAN_SHIFT) > 0) + current = current.ensureChild(); + + current.reset(EMPTY_LEAF, POSITIVE_INFINITY, updateF, null); + for (K key : source) + current.addNewKey(updateF.apply(key)); + + current = current.ascendToRoot(); + + Object[] r = current.toNode(); + current.clear(); + return r; + } +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/TreeCursor.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/TreeCursor.java b/src/java/org/apache/cassandra/utils/btree/TreeCursor.java new file mode 100644 index 0000000..9e946f9 --- /dev/null +++ b/src/java/org/apache/cassandra/utils/btree/TreeCursor.java @@ -0,0 +1,272 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.cassandra.utils.btree; + +import java.util.Arrays; +import java.util.Comparator; + +import static org.apache.cassandra.utils.btree.BTree.*; + +/** + * Supports two basic operations for moving around a BTree, either forwards or backwards: + * moveOne(), and seekTo() + * + * These two methods, along with movement to the start/end, permit us to construct any desired + * movement around a btree, without much cognitive burden. + * + * This TreeCursor is (and has its methods) package private. This is to avoid polluting the BTreeSearchIterator + * that extends it, and uses its functionality. If this class is useful for wider consumption, a public extension + * class can be provided, that just makes all of its methods public. + */ +class TreeCursor<K> extends NodeCursor<K> +{ + // TODO: spend some time optimising compiler inlining decisions: many of these methods have only one primary call-site + + NodeCursor<K> cur; + + TreeCursor(Comparator<? super K> comparator, Object[] node) + { + super(node, null, comparator); + } + + /** + * Move the cursor to either the first or last item in the btree + * @param start true if should move to the first item; false moves to the last + */ + void reset(boolean start) + { + cur = root(); + root().inChild = false; + // this is a corrupt position, but we ensure we never use it except to start our search from + root().position = start ? -1 : getKeyEnd(root().node); + } + + /** + * move the Cursor one item, either forwards or backwards + * @param forwards direction of travel + * @return false iff the cursor is exhausted in the direction of travel + */ + int moveOne(boolean forwards) + { + NodeCursor<K> cur = this.cur; + if (cur.isLeaf()) + { + // if we're a leaf, we try to step forwards inside ourselves + if (cur.advanceLeafNode(forwards)) + return cur.globalLeafIndex(); + + // if we fail, we just find our bounding parent + this.cur = cur = moveOutOfLeaf(forwards, cur, root()); + return cur.globalIndex(); + } + + // otherwise we descend directly into our next child + if (forwards) + ++cur.position; + cur = cur.descend(); + + // and go to its first item + NodeCursor<K> next; + while ( null != (next = cur.descendToFirstChild(forwards)) ) + cur = next; + + this.cur = cur; + return cur.globalLeafIndex(); + } + + /** + * seeks from the current position, forwards or backwards, for the provided key + * while the direction could be inferred (or ignored), it is required so that (e.g.) we do not infinitely loop on bad inputs + * if there is no such key, it moves to the key that would naturally proceed it (i.e. it behaves as ceil when ascending; floor when descending) + */ + int seekTo(K key, boolean forwards, boolean skipOne) + { + NodeCursor<K> cur = this.cur; + + /** + * decide if we will "try one" value by itself, as a sequential access; + * we actually *require* that we try the "current key" for any node before we call seekInNode on it. + * + * if we are already on a value, we just check it irregardless of if it is a leaf or not; + * if we are not, we have already excluded it (as we have consumed it), so: + * if we are on a branch we consider that good enough; + * otherwise, we move onwards one, and we try the new value + * + */ + boolean tryOne = !skipOne; + if ((!tryOne & cur.isLeaf()) && !(tryOne = (cur.advanceLeafNode(forwards) || (cur = moveOutOfLeaf(forwards, cur, null)) != null))) + { + // we moved out of the tree; return out-of-bounds + this.cur = root(); + return forwards ? -1 - size(rootNode()) : -1; + } + + if (tryOne) + { + // we're presently on a value we can (and *must*) cheaply test + K test = cur.value(); + + int cmp; + if (key == test) cmp = 0; // check object identity first, since we utilise that in some places and it's very cheap + else cmp = comparator.compare(key, test); + if (forwards ? cmp <= 0 : cmp >= 0) + { + // we've either matched, or excluded the value from being present + int index = cur.globalIndex(); + this.cur = cur; + return cmp == 0 ? index : -1 -index; + } + } + + // if we failed to match with the cheap test, first look to see if we're even in the correct sub-tree + while (cur != root()) + { + NodeCursor<K> bound = cur.boundIterator(forwards); + if (bound == null) + break; // we're all that's left + + int cmpbound = comparator.compare(key, bound.bound(forwards)); + if (forwards ? cmpbound < 0 : cmpbound > 0) + break; // already in correct sub-tree + + // bound is on-or-before target, so ascend to that bound and continue looking upwards + cur = bound; + cur.safeAdvanceIntoBranchFromChild(forwards); + if (cmpbound == 0) // it was an exact match, so terminate here + { + this.cur = cur; + return cur.globalBranchIndex(); + } + } + + // we must now be able to find our target in the sub-tree rooted at cur + boolean match; + while (!(match = cur.seekInNode(key, forwards)) && !cur.isLeaf()) + { + cur = cur.descend(); + cur.position = forwards ? -1 : getKeyEnd(cur.node); + } + + if (!match) + cur = ensureValidLocation(forwards, cur); + + this.cur = cur; + assert !cur.inChild; + int index = cur.globalIndex(); + return match ? index : -1 -index; + } + + /** + * ensures a leaf node we have seeked in, is not positioned outside of its bounds, + * by moving us into its parents (if any); if it is the root, we're permitted to be out-of-bounds + * as this indicates exhaustion + */ + private NodeCursor<K> ensureValidLocation(boolean forwards, NodeCursor<K> cur) + { + assert cur.isLeaf(); + int position = cur.position; + // if we're out of bounds of the leaf, move once in direction of travel + if ((position < 0) | (position >= getLeafKeyEnd(cur.node))) + cur = moveOutOfLeaf(forwards, cur, root()); + return cur; + } + + /** + * move out of a leaf node that is currently out of (its own) bounds + * @return null if we're now out-of-bounds of the whole true + */ + private <K> NodeCursor<K> moveOutOfLeaf(boolean forwards, NodeCursor<K> cur, NodeCursor<K> ifFail) + { + while (true) + { + cur = cur.parent; + if (cur == null) + { + root().inChild = false; + return ifFail; + } + if (cur.advanceIntoBranchFromChild(forwards)) + break; + } + cur.inChild = false; + return cur; + } + + /** + * resets the cursor and seeks to the specified position; does not assume locality or take advantage of the cursor's current position + */ + void seekTo(int index) + { + if ((index < 0) | (index >= BTree.size(rootNode()))) + { + if ((index < -1) | (index > BTree.size(rootNode()))) + throw new IndexOutOfBoundsException(index + " not in range [0.." + BTree.size(rootNode()) + ")"); + reset(index == -1); + return; + } + + NodeCursor<K> cur = this.cur; + cur = root(); + assert cur.nodeOffset == 0; + while (true) + { + int relativeIndex = index - cur.nodeOffset; // index within subtree rooted at cur + Object[] node = cur.node; + + if (cur.isLeaf()) + { + assert relativeIndex < getLeafKeyEnd(node); + cur.position = relativeIndex; + this.cur = cur; + return; + } + + int[] sizeMap = getSizeMap(node); + int boundary = Arrays.binarySearch(sizeMap, relativeIndex); + if (boundary >= 0) + { + // exact match, in this branch node + assert boundary < sizeMap.length - 1; + cur.position = boundary; + cur.inChild = false; + this.cur = cur; + return; + } + + cur.inChild = true; + cur.position = -1 -boundary; + cur = cur.descend(); + } + } + + private NodeCursor<K> root() + { + return this; + } + + Object[] rootNode() + { + return this.node; + } + + K currentValue() + { + return cur.value(); + } +} http://git-wip-us.apache.org/repos/asf/cassandra/blob/5250d7ff/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java b/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java index 93c02ae..9a7fa1b 100644 --- a/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java +++ b/src/java/org/apache/cassandra/utils/btree/UpdateFunction.java @@ -42,4 +42,30 @@ public interface UpdateFunction<K, V> extends Function<K, V> */ void allocated(long heapSize); + static final UpdateFunction<Object, Object> noOp = new UpdateFunction<Object, Object>() + { + public Object apply(Object replacing, Object updating) + { + return updating; + } + + public boolean abortEarly() + { + return false; + } + + public void allocated(long heapSize) + { + } + + public Object apply(Object k) + { + return k; + } + }; + + public static <K> UpdateFunction<K, K> noOp() + { + return (UpdateFunction<K, K>) noOp; + } }