[08/14] cassandra git commit: Integrate SASI index into Cassandra

xedin Sat, 23 Jan 2016 19:36:55 -0800

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java 
b/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java
new file mode 100644
index 0000000..0d2214a
--- /dev/null
+++ 
b/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.index.sasi.utils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.PriorityQueue;
+
+import com.google.common.collect.Iterators;
+import org.apache.cassandra.io.util.FileUtils;
+
+import com.google.common.annotations.VisibleForTesting;
+
+public class RangeIntersectionIterator
+{
+    protected enum Strategy
+    {
+        BOUNCE, LOOKUP, ADAPTIVE
+    }
+
+    public static <K extends Comparable<K>, D extends CombinedValue<K>> 
Builder<K, D> builder()
+    {
+        return builder(Strategy.ADAPTIVE);
+    }
+
+    @VisibleForTesting
+    protected static <K extends Comparable<K>, D extends CombinedValue<K>> 
Builder<K, D> builder(Strategy strategy)
+    {
+        return new Builder<>(strategy);
+    }
+
+    public static class Builder<K extends Comparable<K>, D extends 
CombinedValue<K>> extends RangeIterator.Builder<K, D>
+    {
+        private final Strategy strategy;
+
+        public Builder(Strategy strategy)
+        {
+            super(IteratorType.INTERSECTION);
+            this.strategy = strategy;
+        }
+
+        protected RangeIterator<K, D> buildIterator()
+        {
+            // if the range is disjoint we can simply return empty
+            // iterator of any type, because it's not going to produce any 
results.
+            if (statistics.isDisjoint())
+                return new BounceIntersectionIterator<>(statistics, new 
PriorityQueue<RangeIterator<K, D>>(1));
+
+            switch (strategy)
+            {
+                case LOOKUP:
+                    return new LookupIntersectionIterator<>(statistics, 
ranges);
+
+                case BOUNCE:
+                    return new BounceIntersectionIterator<>(statistics, 
ranges);
+
+                case ADAPTIVE:
+                    return statistics.sizeRatio() <= 0.01d
+                            ? new LookupIntersectionIterator<>(statistics, 
ranges)
+                            : new BounceIntersectionIterator<>(statistics, 
ranges);
+
+                default:
+                    throw new IllegalStateException("Unknown strategy: " + 
strategy);
+            }
+        }
+    }
+
+    private static abstract class AbstractIntersectionIterator<K extends 
Comparable<K>, D extends CombinedValue<K>> extends RangeIterator<K, D>
+    {
+        protected final PriorityQueue<RangeIterator<K, D>> ranges;
+
+        private AbstractIntersectionIterator(Builder.Statistics<K, D> 
statistics, PriorityQueue<RangeIterator<K, D>> ranges)
+        {
+            super(statistics);
+            this.ranges = ranges;
+        }
+
+        public void close() throws IOException
+        {
+            for (RangeIterator<K, D> range : ranges)
+                FileUtils.closeQuietly(range);
+        }
+    }
+
+    /**
+     * Iterator which performs intersection of multiple ranges by using 
bouncing (merge-join) technique to identify
+     * common elements in the given ranges. Aforementioned "bounce" works as 
follows: range queue is poll'ed for the
+     * range with the smallest current token (main loop), that token is used 
to {@link RangeIterator#skipTo(Comparable)}
+     * other ranges, if token produced by {@link 
RangeIterator#skipTo(Comparable)} is equal to current "candidate" token,
+     * both get merged together and the same operation is repeated for next 
range from the queue, if returned token
+     * is not equal than candidate, candidate's range gets put back into the 
queue and the main loop gets repeated until
+     * next intersection token is found or at least one iterator runs out of 
tokens.
+     *
+     * This technique is every efficient to jump over gaps in the ranges.
+     *
+     * @param <K> The type used to sort ranges.
+     * @param <D> The container type which is going to be returned by {@link 
Iterator#next()}.
+     */
+    @VisibleForTesting
+    protected static class BounceIntersectionIterator<K extends Comparable<K>, 
D extends CombinedValue<K>> extends AbstractIntersectionIterator<K, D>
+    {
+        private BounceIntersectionIterator(Builder.Statistics<K, D> 
statistics, PriorityQueue<RangeIterator<K, D>> ranges)
+        {
+            super(statistics, ranges);
+        }
+
+        protected D computeNext()
+        {
+            while (!ranges.isEmpty())
+            {
+                RangeIterator<K, D> head = ranges.poll();
+
+                // jump right to the beginning of the intersection or return 
next element
+                if (head.getCurrent().compareTo(getMinimum()) < 0)
+                    head.skipTo(getMinimum());
+
+                D candidate = head.hasNext() ? head.next() : null;
+                if (candidate == null || 
candidate.get().compareTo(getMaximum()) > 0)
+                {
+                    ranges.add(head);
+                    return endOfData();
+                }
+
+                List<RangeIterator<K, D>> processed = new ArrayList<>();
+
+                boolean intersectsAll = true, exhausted = false;
+                while (!ranges.isEmpty())
+                {
+                    RangeIterator<K, D> range = ranges.poll();
+
+                    // found a range which doesn't overlap with one (or 
possibly more) other range(s)
+                    if (!isOverlapping(head, range))
+                    {
+                        exhausted = true;
+                        intersectsAll = false;
+                        break;
+                    }
+
+                    D point = range.skipTo(candidate.get());
+
+                    if (point == null) // other range is exhausted
+                    {
+                        exhausted = true;
+                        intersectsAll = false;
+                        break;
+                    }
+
+                    processed.add(range);
+
+                    if (candidate.get().equals(point.get()))
+                    {
+                        candidate.merge(point);
+                        // advance skipped range to the next element if any
+                        Iterators.getNext(range, null);
+                    }
+                    else
+                    {
+                        intersectsAll = false;
+                        break;
+                    }
+                }
+
+                ranges.add(head);
+
+                for (RangeIterator<K, D> range : processed)
+                    ranges.add(range);
+
+                if (exhausted)
+                    return endOfData();
+
+                if (intersectsAll)
+                    return candidate;
+            }
+
+            return endOfData();
+        }
+
+        protected void performSkipTo(K nextToken)
+        {
+            List<RangeIterator<K, D>> skipped = new ArrayList<>();
+
+            while (!ranges.isEmpty())
+            {
+                RangeIterator<K, D> range = ranges.poll();
+                range.skipTo(nextToken);
+                skipped.add(range);
+            }
+
+            for (RangeIterator<K, D> range : skipped)
+                ranges.add(range);
+        }
+    }
+
+    /**
+     * Iterator which performs a linear scan over a primary range (the 
smallest of the ranges)
+     * and O(log(n)) lookup into secondary ranges using values from the 
primary iterator.
+     * This technique is efficient when one of the intersection ranges is 
smaller than others
+     * e.g. ratio 0.01d (default), in such situation scan + lookup is more 
efficient comparing
+     * to "bounce" merge because "bounce" distance is never going to be big.
+     *
+     * @param <K> The type used to sort ranges.
+     * @param <D> The container type which is going to be returned by {@link 
Iterator#next()}.
+     */
+    @VisibleForTesting
+    protected static class LookupIntersectionIterator<K extends Comparable<K>, 
D extends CombinedValue<K>> extends AbstractIntersectionIterator<K, D>
+    {
+        private final RangeIterator<K, D> smallestIterator;
+
+        private LookupIntersectionIterator(Builder.Statistics<K, D> 
statistics, PriorityQueue<RangeIterator<K, D>> ranges)
+        {
+            super(statistics, ranges);
+
+            smallestIterator = statistics.minRange;
+
+            if (smallestIterator.getCurrent().compareTo(getMinimum()) < 0)
+                smallestIterator.skipTo(getMinimum());
+        }
+
+        protected D computeNext()
+        {
+            while (smallestIterator.hasNext())
+            {
+                D candidate = smallestIterator.next();
+                K token = candidate.get();
+
+                boolean intersectsAll = true;
+                for (RangeIterator<K, D> range : ranges)
+                {
+                    // avoid checking against self, much cheaper than changing 
queue comparator
+                    // to compare based on the size and re-populating such 
queue.
+                    if (range.equals(smallestIterator))
+                        continue;
+
+                    // found a range which doesn't overlap with one (or 
possibly more) other range(s)
+                    if (!isOverlapping(smallestIterator, range))
+                        return endOfData();
+
+                    D point = range.skipTo(token);
+
+                    if (point == null) // one of the iterators is exhausted
+                        return endOfData();
+
+                    if (!point.get().equals(token))
+                    {
+                        intersectsAll = false;
+                        break;
+                    }
+
+                    candidate.merge(point);
+                }
+
+                if (intersectsAll)
+                    return candidate;
+            }
+
+            return endOfData();
+        }
+
+        protected void performSkipTo(K nextToken)
+        {
+            smallestIterator.skipTo(nextToken);
+        }
+    }
+}


http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java 
b/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java
new file mode 100644
index 0000000..1b5aee4
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java
@@ -0,0 +1,279 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.index.sasi.utils;
+
+import java.io.Closeable;
+import java.util.Comparator;
+import java.util.List;
+import java.util.PriorityQueue;
+
+import com.google.common.annotations.VisibleForTesting;
+
+public abstract class RangeIterator<K extends Comparable<K>, T extends 
CombinedValue<K>> extends AbstractIterator<T> implements Closeable
+{
+    private final K min, max;
+    private final long count;
+    private K current;
+
+    protected RangeIterator(Builder.Statistics<K, T> statistics)
+    {
+        this(statistics.min, statistics.max, statistics.tokenCount);
+    }
+
+    public RangeIterator(RangeIterator<K, T> range)
+    {
+        this(range == null ? null : range.min, range == null ? null : 
range.max, range == null ? -1 : range.count);
+    }
+
+    public RangeIterator(K min, K max, long count)
+    {
+        this.min = min;
+        this.current = min;
+        this.max = max;
+        this.count = count;
+    }
+
+    public final K getMinimum()
+    {
+        return min;
+    }
+
+    public final K getCurrent()
+    {
+        return current;
+    }
+
+    public final K getMaximum()
+    {
+        return max;
+    }
+
+    public final long getCount()
+    {
+        return count;
+    }
+
+    /**
+     * When called, this iterators current position should
+     * be skipped forwards until finding either:
+     *   1) an element equal to or bigger than next
+     *   2) the end of the iterator
+     *
+     * @param nextToken value to skip the iterator forward until matching
+     *
+     * @return The next current token after the skip was performed
+     */
+    public final T skipTo(K nextToken)
+    {
+        if (min == null || max == null)
+            return endOfData();
+
+        if (current.compareTo(nextToken) >= 0)
+            return next == null ? recomputeNext() : next;
+
+        if (max.compareTo(nextToken) < 0)
+            return endOfData();
+
+        performSkipTo(nextToken);
+        return recomputeNext();
+    }
+
+    protected abstract void performSkipTo(K nextToken);
+
+    protected T recomputeNext()
+    {
+        return tryToComputeNext() ? peek() : endOfData();
+    }
+
+    protected boolean tryToComputeNext()
+    {
+        boolean hasNext = super.tryToComputeNext();
+        current = hasNext ? next.get() : getMaximum();
+        return hasNext;
+    }
+
+    public static abstract class Builder<K extends Comparable<K>, D extends 
CombinedValue<K>>
+    {
+        public enum IteratorType
+        {
+            UNION, INTERSECTION
+        }
+
+        @VisibleForTesting
+        protected final Statistics<K, D> statistics;
+
+        @VisibleForTesting
+        protected final PriorityQueue<RangeIterator<K, D>> ranges;
+
+        public Builder(IteratorType type)
+        {
+            statistics = new Statistics<>(type);
+            ranges = new PriorityQueue<>(16, (Comparator<RangeIterator<K, D>>) 
(a, b) -> a.getCurrent().compareTo(b.getCurrent()));
+        }
+
+        public K getMinimum()
+        {
+            return statistics.min;
+        }
+
+        public K getMaximum()
+        {
+            return statistics.max;
+        }
+
+        public long getTokenCount()
+        {
+            return statistics.tokenCount;
+        }
+
+        public int rangeCount()
+        {
+            return ranges.size();
+        }
+
+        public Builder<K, D> add(RangeIterator<K, D> range)
+        {
+            if (range == null || range.getMinimum() == null || 
range.getMaximum() == null)
+                return this;
+
+            ranges.add(range);
+            statistics.update(range);
+
+            return this;
+        }
+
+        public Builder<K, D> add(List<RangeIterator<K, D>> ranges)
+        {
+            if (ranges == null || ranges.isEmpty())
+                return this;
+
+            ranges.forEach(this::add);
+            return this;
+        }
+
+        public final RangeIterator<K, D> build()
+        {
+            switch (rangeCount())
+            {
+                case 0:
+                    return null;
+
+                case 1:
+                    return ranges.poll();
+
+                default:
+                    return buildIterator();
+            }
+        }
+
+        protected abstract RangeIterator<K, D> buildIterator();
+
+        public static class Statistics<K extends Comparable<K>, D extends 
CombinedValue<K>>
+        {
+            protected final IteratorType iteratorType;
+
+            protected K min, max;
+            protected long tokenCount;
+
+            // iterator with the least number of items
+            protected RangeIterator<K, D> minRange;
+            // iterator with the most number of items
+            protected RangeIterator<K, D> maxRange;
+
+            // tracks if all of the added ranges overlap, which is useful in 
case of intersection,
+            // as it gives direct answer as to such iterator is going to 
produce any results.
+            protected boolean isOverlapping = true;
+
+            public Statistics(IteratorType iteratorType)
+            {
+                this.iteratorType = iteratorType;
+            }
+
+            /**
+             * Update statistics information with the given range.
+             *
+             * Updates min/max of the combined range, token count and
+             * tracks range with the least/most number of tokens.
+             *
+             * @param range The range to update statistics with.
+             */
+            public void update(RangeIterator<K, D> range)
+            {
+                switch (iteratorType)
+                {
+                    case UNION:
+                        min = min == null || min.compareTo(range.getMinimum()) 
> 0 ? range.getMinimum() : min;
+                        max = max == null || max.compareTo(range.getMaximum()) 
< 0 ? range.getMaximum() : max;
+                        break;
+
+                    case INTERSECTION:
+                        // minimum of the intersection is the biggest minimum 
of individual iterators
+                        min = min == null || min.compareTo(range.getMinimum()) 
< 0 ? range.getMinimum() : min;
+                        // maximum of the intersection is the smallest maximum 
of individual iterators
+                        max = max == null || max.compareTo(range.getMaximum()) 
> 0 ? range.getMaximum() : max;
+                        break;
+
+                    default:
+                        throw new IllegalStateException("Unknown iterator 
type: " + iteratorType);
+                }
+
+                // check if new range is disjoint with already added ranges, 
which means that this intersection
+                // is not going to produce any results, so we can cleanup 
range storage and never added anything to it.
+                isOverlapping &= isOverlapping(min, max, range);
+
+                minRange = minRange == null ? range : min(minRange, range);
+                maxRange = maxRange == null ? range : max(maxRange, range);
+
+                tokenCount += range.getCount();
+
+            }
+
+            private RangeIterator<K, D> min(RangeIterator<K, D> a, 
RangeIterator<K, D> b)
+            {
+                return a.getCount() > b.getCount() ? b : a;
+            }
+
+            private RangeIterator<K, D> max(RangeIterator<K, D> a, 
RangeIterator<K, D> b)
+            {
+                return a.getCount() > b.getCount() ? a : b;
+            }
+
+            public boolean isDisjoint()
+            {
+                return !isOverlapping;
+            }
+
+            public double sizeRatio()
+            {
+                return minRange.getCount() * 1d / maxRange.getCount();
+            }
+        }
+    }
+
+    @VisibleForTesting
+    protected static <K extends Comparable<K>, D extends CombinedValue<K>> 
boolean isOverlapping(RangeIterator<K, D> a, RangeIterator<K, D> b)
+    {
+        return isOverlapping(a.getCurrent(), a.getMaximum(), b);
+    }
+
+    @VisibleForTesting
+    protected static <K extends Comparable<K>, D extends CombinedValue<K>> 
boolean isOverlapping(K min, K max, RangeIterator<K, D> b)
+    {
+        return min.compareTo(b.getMaximum()) <= 0 && 
b.getCurrent().compareTo(max) <= 0;
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java 
b/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java
new file mode 100644
index 0000000..310b6d0
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.index.sasi.utils;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Collectors;
+
+import org.apache.cassandra.io.util.FileUtils;
+
+/**
+ * Range Union Iterator is used to return sorted stream of elements from 
multiple RangeIterator instances.
+ *
+ * PriorityQueue is used as a sorting mechanism for the ranges, where each 
computeNext() operation would poll
+ * from the queue (and push when done), which returns range that contains the 
smallest element, because
+ * sorting is done on the moving window of range iteration {@link 
RangeIterator#getCurrent()}. Once retrieved
+ * the smallest element (return candidate) is attempted to be merged with 
other ranges, because there could
+ * be equal elements in adjacent ranges, such ranges are poll'ed only if their 
{@link RangeIterator#getCurrent()}
+ * equals to the return candidate.
+ *
+ * @param <K> The type used to sort ranges.
+ * @param <D> The container type which is going to be returned by {@link 
Iterator#next()}.
+ */
+public class RangeUnionIterator<K extends Comparable<K>, D extends 
CombinedValue<K>> extends RangeIterator<K, D>
+{
+    private final PriorityQueue<RangeIterator<K, D>> ranges;
+
+    private RangeUnionIterator(Builder.Statistics<K, D> statistics, 
PriorityQueue<RangeIterator<K, D>> ranges)
+    {
+        super(statistics);
+        this.ranges = ranges;
+    }
+
+    public D computeNext()
+    {
+        RangeIterator<K, D> head = null;
+
+        while (!ranges.isEmpty())
+        {
+            head = ranges.poll();
+            if (head.hasNext())
+                break;
+
+            FileUtils.closeQuietly(head);
+        }
+
+        if (head == null || !head.hasNext())
+            return endOfData();
+
+        D candidate = head.next();
+
+        List<RangeIterator<K, D>> processedRanges = new ArrayList<>();
+
+        if (head.hasNext())
+            processedRanges.add(head);
+        else
+            FileUtils.closeQuietly(head);
+
+        while (!ranges.isEmpty())
+        {
+            // peek here instead of poll is an optimization
+            // so we can re-insert less ranges back if candidate
+            // is less than head of the current range.
+            RangeIterator<K, D> range = ranges.peek();
+
+            int cmp = candidate.get().compareTo(range.getCurrent());
+
+            assert cmp <= 0;
+
+            if (cmp < 0)
+            {
+                break; // candidate is smaller than next token, return 
immediately
+            }
+            else if (cmp == 0)
+            {
+                candidate.merge(range.next()); // consume and merge
+
+                range = ranges.poll();
+                // re-prioritize changed range
+
+                if (range.hasNext())
+                    processedRanges.add(range);
+                else
+                    FileUtils.closeQuietly(range);
+            }
+        }
+
+        ranges.addAll(processedRanges);
+        return candidate;
+    }
+
+    protected void performSkipTo(K nextToken)
+    {
+        List<RangeIterator<K, D>> changedRanges = new ArrayList<>();
+
+        while (!ranges.isEmpty())
+        {
+            if (ranges.peek().getCurrent().compareTo(nextToken) >= 0)
+                break;
+
+            RangeIterator<K, D> head = ranges.poll();
+
+            if (head.getMaximum().compareTo(nextToken) >= 0)
+            {
+                head.skipTo(nextToken);
+                changedRanges.add(head);
+                continue;
+            }
+
+            FileUtils.closeQuietly(head);
+        }
+
+        ranges.addAll(changedRanges.stream().collect(Collectors.toList()));
+    }
+
+    public void close() throws IOException
+    {
+        ranges.forEach(FileUtils::closeQuietly);
+    }
+
+    public static <K extends Comparable<K>, D extends CombinedValue<K>> 
Builder<K, D> builder()
+    {
+        return new Builder<>();
+    }
+
+    public static <K extends Comparable<K>, D extends CombinedValue<K>> 
RangeIterator<K, D> build(List<RangeIterator<K, D>> tokens)
+    {
+        return new Builder<K, D>().add(tokens).build();
+    }
+
+    public static class Builder<K extends Comparable<K>, D extends 
CombinedValue<K>> extends RangeIterator.Builder<K, D>
+    {
+        public Builder()
+        {
+            super(IteratorType.UNION);
+        }
+
+        protected RangeIterator<K, D> buildIterator()
+        {
+            return new RangeUnionIterator<>(statistics, ranges);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/TypeUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/utils/TypeUtil.java 
b/src/java/org/apache/cassandra/index/sasi/utils/TypeUtil.java
new file mode 100644
index 0000000..8b38530
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sasi/utils/TypeUtil.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cassandra.index.sasi.utils;
+
+import java.nio.ByteBuffer;
+
+import org.apache.cassandra.db.marshal.*;
+import org.apache.cassandra.serializers.MarshalException;
+
+public class TypeUtil
+{
+    public static boolean isValid(ByteBuffer term, AbstractType<?> validator)
+    {
+        try
+        {
+            validator.validate(term);
+            return true;
+        }
+        catch (MarshalException e)
+        {
+            return false;
+        }
+    }
+
+    public static ByteBuffer tryUpcast(ByteBuffer term, AbstractType<?> 
validator)
+    {
+        if (term.remaining() == 0)
+            return null;
+
+        try
+        {
+            if (validator instanceof Int32Type && term.remaining() == 2)
+            {
+                return Int32Type.instance.decompose((int) 
term.getShort(term.position()));
+            }
+            else if (validator instanceof LongType)
+            {
+                long upcastToken;
+
+                switch (term.remaining())
+                {
+                    case 2:
+                        upcastToken = (long) term.getShort(term.position());
+                        break;
+
+                    case 4:
+                        upcastToken = (long) Int32Type.instance.compose(term);
+                        break;
+
+                    default:
+                        upcastToken = 
Long.valueOf(UTF8Type.instance.getString(term));
+                }
+
+                return LongType.instance.decompose(upcastToken);
+            }
+            else if (validator instanceof DoubleType && term.remaining() == 4)
+            {
+                return DoubleType.instance.decompose((double) 
FloatType.instance.compose(term));
+            }
+
+            // maybe it was a string after all
+            return validator.fromString(UTF8Type.instance.getString(term));
+        }
+        catch (Exception e)
+        {
+            return null;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractPatriciaTrie.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractPatriciaTrie.java 
b/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractPatriciaTrie.java
new file mode 100644
index 0000000..b359416
--- /dev/null
+++ 
b/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractPatriciaTrie.java
@@ -0,0 +1,1151 @@
+/*
+ * Copyright 2005-2010 Roger Kapsi, Sam Berlin
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ */
+
+/**
+ * This class is taken from https://github.com/rkapsi/patricia-trie (v0.6), 
and slightly modified
+ * to correspond to Cassandra code style, as the only Patricia Trie 
implementation,
+ * which supports pluggable key comparators (e.g. commons-collections 
PatriciaTrie (which is based
+ * on rkapsi/patricia-trie project) only supports String keys)
+ * but unfortunately is not deployed to the maven central as a downloadable 
artifact.
+ */
+
+package org.apache.cassandra.index.sasi.utils.trie;
+
+import java.util.AbstractCollection;
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.ConcurrentModificationException;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+import org.apache.cassandra.index.sasi.utils.trie.Cursor.Decision;
+
+/**
+ * This class implements the base PATRICIA algorithm and everything that
+ * is related to the {@link Map} interface.
+ */
+abstract class AbstractPatriciaTrie<K, V> extends AbstractTrie<K, V>
+{
+    private static final long serialVersionUID = -2303909182832019043L;
+
+    /**
+     * The root node of the {@link Trie}. 
+     */
+    final TrieEntry<K, V> root = new TrieEntry<>(null, null, -1);
+    
+    /**
+     * Each of these fields are initialized to contain an instance of the
+     * appropriate view the first time this view is requested. The views are
+     * stateless, so there's no reason to create more than one of each.
+     */
+    private transient volatile Set<K> keySet;
+    private transient volatile Collection<V> values;
+    private transient volatile Set<Map.Entry<K,V>> entrySet;
+    
+    /**
+     * The current size of the {@link Trie}
+     */
+    private int size = 0;
+    
+    /**
+     * The number of times this {@link Trie} has been modified.
+     * It's used to detect concurrent modifications and fail-fast
+     * the {@link Iterator}s.
+     */
+    transient int modCount = 0;
+    
+    public AbstractPatriciaTrie(KeyAnalyzer<? super K> keyAnalyzer)
+    {
+        super(keyAnalyzer);
+    }
+    
+    public AbstractPatriciaTrie(KeyAnalyzer<? super K> keyAnalyzer, Map<? 
extends K, ? extends V> m)
+    {
+        super(keyAnalyzer);
+        putAll(m);
+    }
+    
+    @Override
+    public void clear()
+    {
+        root.key = null;
+        root.bitIndex = -1;
+        root.value = null;
+        
+        root.parent = null;
+        root.left = root;
+        root.right = null;
+        root.predecessor = root;
+        
+        size = 0;
+        incrementModCount();
+    }
+    
+    @Override
+    public int size()
+    {
+        return size;
+    }
+   
+    /**
+     * A helper method to increment the {@link Trie} size
+     * and the modification counter.
+     */
+    void incrementSize()
+    {
+        size++;
+        incrementModCount();
+    }
+    
+    /**
+     * A helper method to decrement the {@link Trie} size
+     * and increment the modification counter.
+     */
+    void decrementSize()
+    {
+        size--;
+        incrementModCount();
+    }
+    
+    /**
+     * A helper method to increment the modification counter.
+     */
+    private void incrementModCount()
+    {
+        ++modCount;
+    }
+    
+    @Override
+    public V put(K key, V value)
+    {
+        if (key == null)
+            throw new NullPointerException("Key cannot be null");
+        
+        int lengthInBits = lengthInBits(key);
+        
+        // The only place to store a key with a length
+        // of zero bits is the root node
+        if (lengthInBits == 0)
+        {
+            if (root.isEmpty())
+                incrementSize();
+            else
+                incrementModCount();
+
+            return root.setKeyValue(key, value);
+        }
+        
+        TrieEntry<K, V> found = getNearestEntryForKey(key);
+        if (compareKeys(key, found.key))
+        {
+            if (found.isEmpty()) // <- must be the root
+                incrementSize();
+            else
+                incrementModCount();
+
+            return found.setKeyValue(key, value);
+        }
+        
+        int bitIndex = bitIndex(key, found.key);
+        if (!Tries.isOutOfBoundsIndex(bitIndex))
+        {
+            if (Tries.isValidBitIndex(bitIndex)) // in 99.999...9% the case
+            {
+                /* NEW KEY+VALUE TUPLE */
+                TrieEntry<K, V> t = new TrieEntry<>(key, value, bitIndex);
+                addEntry(t);
+                incrementSize();
+                return null;
+            }
+            else if (Tries.isNullBitKey(bitIndex))
+            {
+                // A bits of the Key are zero. The only place to
+                // store such a Key is the root Node!
+                
+                /* NULL BIT KEY */
+                if (root.isEmpty())
+                    incrementSize();
+                else
+                    incrementModCount();
+
+                return root.setKeyValue(key, value);
+                
+            }
+            else if (Tries.isEqualBitKey(bitIndex))
+            {
+                // This is a very special and rare case.
+                
+                /* REPLACE OLD KEY+VALUE */
+                if (found != root) {
+                    incrementModCount();
+                    return found.setKeyValue(key, value);
+                }
+            }
+        }
+        
+        throw new IndexOutOfBoundsException("Failed to put: " 
+                + key + " -> " + value + ", " + bitIndex);
+    }
+    
+    /**
+     * Adds the given {@link TrieEntry} to the {@link Trie}
+     */
+    TrieEntry<K, V> addEntry(TrieEntry<K, V> entry)
+    {
+        TrieEntry<K, V> current = root.left;
+        TrieEntry<K, V> path = root;
+
+        while(true)
+        {
+            if (current.bitIndex >= entry.bitIndex || current.bitIndex <= 
path.bitIndex)
+            {
+                entry.predecessor = entry;
+                
+                if (!isBitSet(entry.key, entry.bitIndex))
+                {
+                    entry.left = entry;
+                    entry.right = current;
+                }
+                else
+                {
+                    entry.left = current;
+                    entry.right = entry;
+                }
+               
+                entry.parent = path;
+                if (current.bitIndex >= entry.bitIndex)
+                    current.parent = entry;
+                
+                // if we inserted an uplink, set the predecessor on it
+                if (current.bitIndex <= path.bitIndex)
+                    current.predecessor = entry;
+         
+                if (path == root || !isBitSet(entry.key, path.bitIndex))
+                    path.left = entry;
+                else
+                    path.right = entry;
+                
+                return entry;
+            }
+                
+            path = current;
+            
+            current = !isBitSet(entry.key, current.bitIndex)
+                       ? current.left : current.right;
+        }
+    }
+    
+    @Override
+    public V get(Object k)
+    {
+        TrieEntry<K, V> entry = getEntry(k);
+        return entry != null ? entry.getValue() : null;
+    }
+
+    /**
+     * Returns the entry associated with the specified key in the
+     * AbstractPatriciaTrie.  Returns null if the map contains no mapping
+     * for this key.
+     * 
+     * This may throw ClassCastException if the object is not of type K.
+     */
+    TrieEntry<K,V> getEntry(Object k)
+    {
+        K key = Tries.cast(k);
+        if (key == null)
+            return null;
+        
+        TrieEntry<K,V> entry = getNearestEntryForKey(key);
+        return !entry.isEmpty() && compareKeys(key, entry.key) ? entry : null;
+    }
+    
+    @Override
+    public Map.Entry<K, V> select(K key)
+    {
+        Reference<Map.Entry<K, V>> reference = new Reference<>();
+        return !selectR(root.left, -1, key, reference) ? reference.get() : 
null;
+    }
+    
+    @Override
+    public Map.Entry<K,V> select(K key, Cursor<? super K, ? super V> cursor)
+    {
+        Reference<Map.Entry<K, V>> reference = new Reference<>();
+        selectR(root.left, -1, key, cursor, reference);
+        return reference.get();
+    }
+
+    /**
+     * This is equivalent to the other {@link #selectR(TrieEntry, int,
+     * K, Cursor, Reference)} method but without its overhead
+     * because we're selecting only one best matching Entry from the
+     * {@link Trie}.
+     */
+    private boolean selectR(TrieEntry<K, V> h, int bitIndex, final K key, 
final Reference<Map.Entry<K, V>> reference)
+    {
+        if (h.bitIndex <= bitIndex)
+        {
+            // If we hit the root Node and it is empty
+            // we have to look for an alternative best
+            // matching node.
+            if (!h.isEmpty())
+            {
+                reference.set(h);
+                return false;
+            }
+            return true;
+        }
+
+        if (!isBitSet(key, h.bitIndex))
+        {
+            if (selectR(h.left, h.bitIndex, key, reference))
+            {
+                return selectR(h.right, h.bitIndex, key, reference);
+            }
+        }
+        else
+        {
+            if (selectR(h.right, h.bitIndex, key, reference))
+            {
+                return selectR(h.left, h.bitIndex, key, reference);
+            }
+        }
+
+        return false;
+    }
+    
+    /**
+     * 
+     */
+    private boolean selectR(TrieEntry<K,V> h, int bitIndex, 
+                            final K key, final Cursor<? super K, ? super V> 
cursor,
+                            final Reference<Map.Entry<K, V>> reference)
+    {
+        if (h.bitIndex <= bitIndex)
+        {
+            if (!h.isEmpty())
+            {
+                Decision decision = cursor.select(h);
+                switch(decision)
+                {
+                    case REMOVE:
+                        throw new UnsupportedOperationException("Cannot remove 
during select");
+
+                    case EXIT:
+                        reference.set(h);
+                        return false; // exit
+
+                    case REMOVE_AND_EXIT:
+                        TrieEntry<K, V> entry = new TrieEntry<>(h.getKey(), 
h.getValue(), -1);
+                        reference.set(entry);
+                        removeEntry(h);
+                        return false;
+
+                    case CONTINUE:
+                        // fall through.
+                }
+            }
+
+            return true; // continue
+        }
+
+        if (!isBitSet(key, h.bitIndex))
+        {
+            if (selectR(h.left, h.bitIndex, key, cursor, reference))
+            {
+                return selectR(h.right, h.bitIndex, key, cursor, reference);
+            }
+        }
+        else
+        {
+            if (selectR(h.right, h.bitIndex, key, cursor, reference))
+            {
+                return selectR(h.left, h.bitIndex, key, cursor, reference);
+            }
+        }
+        
+        return false;
+    }
+    
+    @Override
+    public Map.Entry<K, V> traverse(Cursor<? super K, ? super V> cursor)
+    {
+        TrieEntry<K, V> entry = nextEntry(null);
+        while (entry != null)
+        {
+            TrieEntry<K, V> current = entry;
+            
+            Decision decision = cursor.select(current);
+            entry = nextEntry(current);
+            
+            switch(decision)
+            {
+                case EXIT:
+                    return current;
+
+                case REMOVE:
+                    removeEntry(current);
+                    break; // out of switch, stay in while loop
+
+                case REMOVE_AND_EXIT:
+                    Map.Entry<K, V> value = new TrieEntry<>(current.getKey(), 
current.getValue(), -1);
+                    removeEntry(current);
+                    return value;
+
+                case CONTINUE: // do nothing.
+            }
+        }
+        
+        return null;
+    }
+    
+    @Override
+    public boolean containsKey(Object k)
+    {
+        if (k == null)
+            return false;
+        
+        K key = Tries.cast(k);
+        TrieEntry<K, V> entry = getNearestEntryForKey(key);
+        return !entry.isEmpty() && compareKeys(key, entry.key);
+    }
+    
+    @Override
+    public Set<Map.Entry<K,V>> entrySet()
+    {
+        if (entrySet == null)
+            entrySet = new EntrySet();
+
+        return entrySet;
+    }
+    
+    @Override
+    public Set<K> keySet()
+    {
+        if (keySet == null)
+            keySet = new KeySet();
+        return keySet;
+    }
+    
+    @Override
+    public Collection<V> values()
+    {
+        if (values == null)
+            values = new Values();
+        return values;
+    }
+    
+    /**
+     * {@inheritDoc}
+     * 
+     * @throws ClassCastException if provided key is of an incompatible type 
+     */
+    @Override
+    public V remove(Object k)
+    {
+        if (k == null)
+            return null;
+        
+        K key = Tries.cast(k);
+        TrieEntry<K, V> current = root.left;
+        TrieEntry<K, V> path = root;
+        while (true)
+        {
+            if (current.bitIndex <= path.bitIndex)
+            {
+                if (!current.isEmpty() && compareKeys(key, current.key))
+                {
+                    return removeEntry(current);
+                }
+                else
+                {
+                    return null;
+                }
+            }
+            
+            path = current;
+            current = !isBitSet(key, current.bitIndex) ? current.left : 
current.right;
+        }
+    }
+    
+    /**
+     * Returns the nearest entry for a given key.  This is useful
+     * for finding knowing if a given key exists (and finding the value
+     * for it), or for inserting the key.
+     * 
+     * The actual get implementation. This is very similar to
+     * selectR but with the exception that it might return the
+     * root Entry even if it's empty.
+     */
+    TrieEntry<K, V> getNearestEntryForKey(K key)
+    {
+        TrieEntry<K, V> current = root.left;
+        TrieEntry<K, V> path = root;
+
+        while(true)
+        {
+            if (current.bitIndex <= path.bitIndex)
+                return current;
+            
+            path = current;
+            current = !isBitSet(key, current.bitIndex) ? current.left : 
current.right;
+        }
+    }
+    
+    /**
+     * Removes a single entry from the {@link Trie}.
+     * 
+     * If we found a Key (Entry h) then figure out if it's
+     * an internal (hard to remove) or external Entry (easy 
+     * to remove)
+     */
+    V removeEntry(TrieEntry<K, V> h)
+    {
+        if (h != root)
+        {
+            if (h.isInternalNode())
+            {
+                removeInternalEntry(h);
+            }
+            else
+            {
+                removeExternalEntry(h);
+            }
+        }
+        
+        decrementSize();
+        return h.setKeyValue(null, null);
+    }
+    
+    /**
+     * Removes an external entry from the {@link Trie}.
+     * 
+     * If it's an external Entry then just remove it.
+     * This is very easy and straight forward.
+     */
+    private void removeExternalEntry(TrieEntry<K, V> h)
+    {
+        if (h == root)
+        {
+            throw new IllegalArgumentException("Cannot delete root Entry!");
+        }
+        else if (!h.isExternalNode())
+        {
+            throw new IllegalArgumentException(h + " is not an external 
Entry!");
+        } 
+        
+        TrieEntry<K, V> parent = h.parent;
+        TrieEntry<K, V> child = (h.left == h) ? h.right : h.left;
+        
+        if (parent.left == h)
+        {
+            parent.left = child;
+        }
+        else
+        {
+            parent.right = child;
+        }
+        
+        // either the parent is changing, or the predecessor is changing.
+        if (child.bitIndex > parent.bitIndex)
+        {
+            child.parent = parent;
+        }
+        else
+        {
+            child.predecessor = parent;
+        }
+        
+    }
+    
+    /**
+     * Removes an internal entry from the {@link Trie}.
+     * 
+     * If it's an internal Entry then "good luck" with understanding
+     * this code. The Idea is essentially that Entry p takes Entry h's
+     * place in the trie which requires some re-wiring.
+     */
+    private void removeInternalEntry(TrieEntry<K, V> h)
+    {
+        if (h == root)
+        {
+            throw new IllegalArgumentException("Cannot delete root Entry!");
+        }
+        else if (!h.isInternalNode())
+        {
+            throw new IllegalArgumentException(h + " is not an internal 
Entry!");
+        } 
+        
+        TrieEntry<K, V> p = h.predecessor;
+        
+        // Set P's bitIndex
+        p.bitIndex = h.bitIndex;
+        
+        // Fix P's parent, predecessor and child Nodes
+        {
+            TrieEntry<K, V> parent = p.parent;
+            TrieEntry<K, V> child = (p.left == h) ? p.right : p.left;
+            
+            // if it was looping to itself previously,
+            // it will now be pointed from it's parent
+            // (if we aren't removing it's parent --
+            //  in that case, it remains looping to itself).
+            // otherwise, it will continue to have the same
+            // predecessor.
+            if (p.predecessor == p && p.parent != h)
+                p.predecessor = p.parent;
+            
+            if (parent.left == p)
+            {
+                parent.left = child;
+            }
+            else
+            {
+                parent.right = child;
+            }
+            
+            if (child.bitIndex > parent.bitIndex)
+            {
+                child.parent = parent;
+            }
+        }
+        
+        // Fix H's parent and child Nodes
+        {         
+            // If H is a parent of its left and right child 
+            // then change them to P
+            if (h.left.parent == h)
+                h.left.parent = p;
+
+            if (h.right.parent == h)
+                h.right.parent = p;
+            
+            // Change H's parent
+            if (h.parent.left == h)
+            {
+                h.parent.left = p;
+            }
+            else
+            {
+                h.parent.right = p;
+            }
+        }
+        
+        // Copy the remaining fields from H to P
+        //p.bitIndex = h.bitIndex;
+        p.parent = h.parent;
+        p.left = h.left;
+        p.right = h.right;
+        
+        // Make sure that if h was pointing to any uplinks,
+        // p now points to them.
+        if (isValidUplink(p.left, p))
+            p.left.predecessor = p;
+        
+        if (isValidUplink(p.right, p))
+            p.right.predecessor = p;
+    }
+    
+    /**
+     * Returns the entry lexicographically after the given entry.
+     * If the given entry is null, returns the first node.
+     */
+    TrieEntry<K, V> nextEntry(TrieEntry<K, V> node)
+    {
+        return (node == null) ? firstEntry() : nextEntryImpl(node.predecessor, 
node, null);
+    }
+    
+    /**
+     * Scans for the next node, starting at the specified point, and using 
'previous'
+     * as a hint that the last node we returned was 'previous' (so we know not 
to return
+     * it again).  If 'tree' is non-null, this will limit the search to the 
given tree.
+     * 
+     * The basic premise is that each iteration can follow the following steps:
+     * 
+     * 1) Scan all the way to the left.
+     *   a) If we already started from this node last time, proceed to Step 2.
+     *   b) If a valid uplink is found, use it.
+     *   c) If the result is an empty node (root not set), break the scan.
+     *   d) If we already returned the left node, break the scan.
+     *   
+     * 2) Check the right.
+     *   a) If we already returned the right node, proceed to Step 3.
+     *   b) If it is a valid uplink, use it.
+     *   c) Do Step 1 from the right node.
+     *   
+     * 3) Back up through the parents until we encounter find a parent
+     *    that we're not the right child of.
+     *    
+     * 4) If there's no right child of that parent, the iteration is finished.
+     *    Otherwise continue to Step 5.
+     * 
+     * 5) Check to see if the right child is a valid uplink.
+     *    a) If we already returned that child, proceed to Step 6.
+     *       Otherwise, use it.
+     *    
+     * 6) If the right child of the parent is the parent itself, we've
+     *    already found & returned the end of the Trie, so exit.
+     *    
+     * 7) Do Step 1 on the parent's right child.
+     */
+    TrieEntry<K, V> nextEntryImpl(TrieEntry<K, V> start, TrieEntry<K, V> 
previous, TrieEntry<K, V> tree)
+    {
+        TrieEntry<K, V> current = start;
+
+        // Only look at the left if this was a recursive or
+        // the first check, otherwise we know we've already looked
+        // at the left.
+        if (previous == null || start != previous.predecessor)
+        {
+            while (!current.left.isEmpty())
+            {
+                // stop traversing if we've already
+                // returned the left of this node.
+                if (previous == current.left)
+                    break;
+                
+                if (isValidUplink(current.left, current))
+                    return current.left;
+                
+                current = current.left;
+            }
+        }
+        
+        // If there's no data at all, exit.
+        if (current.isEmpty())
+            return null;
+        
+        // If we've already returned the left,
+        // and the immediate right is null,
+        // there's only one entry in the Trie
+        // which is stored at the root.
+        //
+        //  / ("")   <-- root
+        //  \_/  \
+        //       null <-- 'current'
+        //
+        if (current.right == null)
+            return null;
+        
+        // If nothing valid on the left, try the right.
+        if (previous != current.right)
+        {
+            // See if it immediately is valid.
+            if (isValidUplink(current.right, current))
+                return current.right;
+            
+            // Must search on the right's side if it wasn't initially valid.
+            return nextEntryImpl(current.right, previous, tree);
+        }
+        
+        // Neither left nor right are valid, find the first parent
+        // whose child did not come from the right & traverse it.
+        while (current == current.parent.right)
+        {
+            // If we're going to traverse to above the subtree, stop.
+            if (current == tree)
+                return null;
+            
+            current = current.parent;
+        }
+
+        // If we're on the top of the subtree, we can't go any higher.
+        if (current == tree)
+            return null;
+        
+        // If there's no right, the parent must be root, so we're done.
+        if (current.parent.right == null)
+            return null;
+        
+        // If the parent's right points to itself, we've found one.
+        if (previous != current.parent.right && 
isValidUplink(current.parent.right, current.parent))
+            return current.parent.right;
+        
+        // If the parent's right is itself, there can't be any more nodes.
+        if (current.parent.right == current.parent)
+            return null;
+        
+        // We need to traverse down the parent's right's path.
+        return nextEntryImpl(current.parent.right, previous, tree);
+    }
+    
+    /**
+     * Returns the first entry the {@link Trie} is storing.
+     * 
+     * This is implemented by going always to the left until
+     * we encounter a valid uplink. That uplink is the first key.
+     */
+    TrieEntry<K, V> firstEntry()
+    {
+        // if Trie is empty, no first node.
+        return isEmpty() ? null : followLeft(root);
+    }
+    
+    /** 
+     * Goes left through the tree until it finds a valid node. 
+     */
+    TrieEntry<K, V> followLeft(TrieEntry<K, V> node)
+    {
+        while(true)
+        {
+            TrieEntry<K, V> child = node.left;
+            // if we hit root and it didn't have a node, go right instead.
+            if (child.isEmpty())
+                child = node.right;
+            
+            if (child.bitIndex <= node.bitIndex)
+                return child;
+            
+            node = child;
+        }
+    }
+    
+    /** 
+     * Returns true if 'next' is a valid uplink coming from 'from'. 
+     */
+    static boolean isValidUplink(TrieEntry<?, ?> next, TrieEntry<?, ?> from)
+    {
+        return next != null && next.bitIndex <= from.bitIndex && 
!next.isEmpty();
+    }
+    
+    /**
+     * A {@link Reference} allows us to return something through a Method's 
+     * argument list. An alternative would be to an Array with a length of 
+     * one (1) but that leads to compiler warnings. Computationally and memory
+     * wise there's no difference (except for the need to load the 
+     * {@link Reference} Class but that happens only once).
+     */
+    private static class Reference<E>
+    {
+        
+        private E item;
+        
+        public void set(E item)
+        {
+            this.item = item;
+        }
+        
+        public E get()
+        {
+            return item;
+        }
+    }
+    
+    /**
+     *  A {@link Trie} is a set of {@link TrieEntry} nodes
+     */
+    static class TrieEntry<K,V> extends BasicEntry<K, V>
+    {
+        
+        private static final long serialVersionUID = 4596023148184140013L;
+        
+        /** The index this entry is comparing. */
+        protected int bitIndex;
+        
+        /** The parent of this entry. */
+        protected TrieEntry<K,V> parent;
+        
+        /** The left child of this entry. */
+        protected TrieEntry<K,V> left;
+        
+        /** The right child of this entry. */
+        protected TrieEntry<K,V> right;
+        
+        /** The entry who uplinks to this entry. */ 
+        protected TrieEntry<K,V> predecessor;
+        
+        public TrieEntry(K key, V value, int bitIndex)
+        {
+            super(key, value);
+            
+            this.bitIndex = bitIndex;
+            
+            this.parent = null;
+            this.left = this;
+            this.right = null;
+            this.predecessor = this;
+        }
+        
+        /**
+         * Whether or not the entry is storing a key.
+         * Only the root can potentially be empty, all other
+         * nodes must have a key.
+         */
+        public boolean isEmpty()
+        {
+            return key == null;
+        }
+        
+        /** 
+         * Neither the left nor right child is a loopback 
+         */
+        public boolean isInternalNode()
+        {
+            return left != this && right != this;
+        }
+        
+        /** 
+         * Either the left or right child is a loopback 
+         */
+        public boolean isExternalNode()
+        {
+            return !isInternalNode();
+        }
+    }
+    
+
+    /**
+     * This is a entry set view of the {@link Trie} as returned 
+     * by {@link Map#entrySet()}
+     */
+    private class EntrySet extends AbstractSet<Map.Entry<K,V>>
+    {
+        @Override
+        public Iterator<Map.Entry<K,V>> iterator()
+        {
+            return new EntryIterator();
+        }
+        
+        @Override
+        public boolean contains(Object o)
+        {
+            if (!(o instanceof Map.Entry))
+                return false;
+            
+            TrieEntry<K,V> candidate = getEntry(((Map.Entry<?, ?>)o).getKey());
+            return candidate != null && candidate.equals(o);
+        }
+        
+        @Override
+        public boolean remove(Object o)
+        {
+            int size = size();
+            AbstractPatriciaTrie.this.remove(o);
+            return size != size();
+        }
+        
+        @Override
+        public int size()
+        {
+            return AbstractPatriciaTrie.this.size();
+        }
+        
+        @Override
+        public void clear()
+        {
+            AbstractPatriciaTrie.this.clear();
+        }
+        
+        /**
+         * An {@link Iterator} that returns {@link Entry} Objects
+         */
+        private class EntryIterator extends TrieIterator<Map.Entry<K,V>>
+        {
+            @Override
+            public Map.Entry<K,V> next()
+            {
+                return nextEntry();
+            }
+        }
+    }
+    
+    /**
+     * This is a key set view of the {@link Trie} as returned 
+     * by {@link Map#keySet()}
+     */
+    private class KeySet extends AbstractSet<K>
+    {
+        @Override
+        public Iterator<K> iterator()
+        {
+            return new KeyIterator();
+        }
+        
+        @Override
+        public int size()
+        {
+            return AbstractPatriciaTrie.this.size();
+        }
+        
+        @Override
+        public boolean contains(Object o)
+        {
+            return containsKey(o);
+        }
+        
+        @Override
+        public boolean remove(Object o)
+        {
+            int size = size();
+            AbstractPatriciaTrie.this.remove(o);
+            return size != size();
+        }
+        
+        @Override
+        public void clear()
+        {
+            AbstractPatriciaTrie.this.clear();
+        }
+        
+        /**
+         * An {@link Iterator} that returns Key Objects
+         */
+        private class KeyIterator extends TrieIterator<K>
+        {
+            @Override
+            public K next()
+            {
+                return nextEntry().getKey();
+            }
+        }
+    }
+    
+    /**
+     * This is a value view of the {@link Trie} as returned 
+     * by {@link Map#values()}
+     */
+    private class Values extends AbstractCollection<V>
+    {
+        @Override
+        public Iterator<V> iterator()
+        {
+            return new ValueIterator();
+        }
+        
+        @Override
+        public int size()
+        {
+            return AbstractPatriciaTrie.this.size();
+        }
+        
+        @Override
+        public boolean contains(Object o)
+        {
+            return containsValue(o);
+        }
+        
+        @Override
+        public void clear()
+        {
+            AbstractPatriciaTrie.this.clear();
+        }
+        
+        @Override
+        public boolean remove(Object o)
+        {
+            for (Iterator<V> it = iterator(); it.hasNext(); )
+            {
+                V value = it.next();
+                if (Tries.areEqual(value, o))
+                {
+                    it.remove();
+                    return true;
+                }
+            }
+            return false;
+        }
+        
+        /**
+         * An {@link Iterator} that returns Value Objects
+         */
+        private class ValueIterator extends TrieIterator<V>
+        {
+            @Override
+            public V next()
+            {
+                return nextEntry().getValue();
+            }
+        }
+    }
+    
+    /** 
+     * An iterator for the entries. 
+     */
+    abstract class TrieIterator<E> implements Iterator<E>
+    {
+        /**
+         * For fast-fail
+         */
+        protected int expectedModCount = AbstractPatriciaTrie.this.modCount;
+        
+        protected TrieEntry<K, V> next; // the next node to return
+        protected TrieEntry<K, V> current; // the current entry we're on
+        
+        /**
+         * Starts iteration from the root
+         */
+        protected TrieIterator()
+        {
+            next = AbstractPatriciaTrie.this.nextEntry(null);
+        }
+        
+        /**
+         * Starts iteration at the given entry
+         */
+        protected TrieIterator(TrieEntry<K, V> firstEntry)
+        {
+            next = firstEntry;
+        }
+        
+        /**
+         * Returns the next {@link TrieEntry}
+         */
+        protected TrieEntry<K,V> nextEntry()
+        {
+            if (expectedModCount != AbstractPatriciaTrie.this.modCount)
+                throw new ConcurrentModificationException();
+            
+            TrieEntry<K,V> e = next;
+            if (e == null)
+                throw new NoSuchElementException();
+            
+            next = findNext(e);
+            current = e;
+            return e;
+        }
+        
+        /**
+         * @see PatriciaTrie#nextEntry(TrieEntry)
+         */
+        protected TrieEntry<K, V> findNext(TrieEntry<K, V> prior)
+        {
+            return AbstractPatriciaTrie.this.nextEntry(prior);
+        }
+        
+        @Override
+        public boolean hasNext()
+        {
+            return next != null;
+        }
+        
+        @Override
+        public void remove()
+        {
+            if (current == null)
+                throw new IllegalStateException();
+            
+            if (expectedModCount != AbstractPatriciaTrie.this.modCount)
+                throw new ConcurrentModificationException();
+            
+            TrieEntry<K, V> node = current;
+            current = null;
+            AbstractPatriciaTrie.this.removeEntry(node);
+            
+            expectedModCount = AbstractPatriciaTrie.this.modCount;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractTrie.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractTrie.java 
b/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractTrie.java
new file mode 100644
index 0000000..0bf9c20
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sasi/utils/trie/AbstractTrie.java
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2005-2010 Roger Kapsi, Sam Berlin
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ */
+
+package org.apache.cassandra.index.sasi.utils.trie;
+
+import java.io.Serializable;
+import java.util.AbstractMap;
+import java.util.Map;
+
+/**
+ * This class is taken from https://github.com/rkapsi/patricia-trie (v0.6), 
and slightly modified
+ * to correspond to Cassandra code style, as the only Patricia Trie 
implementation,
+ * which supports pluggable key comparators (e.g. commons-collections 
PatriciaTrie (which is based
+ * on rkapsi/patricia-trie project) only supports String keys)
+ * but unfortunately is not deployed to the maven central as a downloadable 
artifact.
+ */
+
+/**
+ * This class provides some basic {@link Trie} functionality and 
+ * utility methods for actual {@link Trie} implementations.
+ */
+abstract class AbstractTrie<K, V> extends AbstractMap<K, V> implements 
Serializable, Trie<K, V>
+{
+    private static final long serialVersionUID = -6358111100045408883L;
+    
+    /**
+     * The {@link KeyAnalyzer} that's being used to build the 
+     * PATRICIA {@link Trie}
+     */
+    protected final KeyAnalyzer<? super K> keyAnalyzer;
+    
+    /** 
+     * Constructs a new {@link Trie} using the given {@link KeyAnalyzer} 
+     */
+    public AbstractTrie(KeyAnalyzer<? super K> keyAnalyzer)
+    {
+        this.keyAnalyzer = Tries.notNull(keyAnalyzer, "keyAnalyzer");
+    }
+    
+    @Override
+    public K selectKey(K key)
+    {
+        Map.Entry<K, V> entry = select(key);
+        return entry != null ? entry.getKey() : null;
+    }
+    
+    @Override
+    public V selectValue(K key)
+    {
+        Map.Entry<K, V> entry = select(key);
+        return entry != null ? entry.getValue() : null;
+    }
+        
+    @Override
+    public String toString()
+    {
+        StringBuilder buffer = new StringBuilder();
+        buffer.append("Trie[").append(size()).append("]={\n");
+        for (Map.Entry<K, V> entry : entrySet()) {
+            buffer.append("  ").append(entry).append("\n");
+        }
+        buffer.append("}\n");
+        return buffer.toString();
+    }
+    
+    /**
+     * Returns the length of the given key in bits
+     * 
+     * @see KeyAnalyzer#lengthInBits(Object)
+     */
+    final int lengthInBits(K key)
+    {
+        return key == null ? 0 : keyAnalyzer.lengthInBits(key);
+    }
+    
+    /**
+     * Returns whether or not the given bit on the 
+     * key is set or false if the key is null.
+     * 
+     * @see KeyAnalyzer#isBitSet(Object, int)
+     */
+    final boolean isBitSet(K key, int bitIndex)
+    {
+        return key != null && keyAnalyzer.isBitSet(key, bitIndex);
+    }
+    
+    /**
+     * Utility method for calling {@link KeyAnalyzer#bitIndex(Object, Object)}
+     */
+    final int bitIndex(K key, K otherKey)
+    {
+        if (key != null && otherKey != null)
+        {
+            return keyAnalyzer.bitIndex(key, otherKey);            
+        }
+        else if (key != null)
+        {
+            return bitIndex(key);
+        }
+        else if (otherKey != null)
+        {
+            return bitIndex(otherKey);
+        }
+        
+        return KeyAnalyzer.NULL_BIT_KEY;
+    }
+    
+    private int bitIndex(K key)
+    {
+        int lengthInBits = lengthInBits(key);
+        for (int i = 0; i < lengthInBits; i++)
+        {
+            if (isBitSet(key, i))
+                return i;
+        }
+        
+        return KeyAnalyzer.NULL_BIT_KEY;
+    }
+    
+    /**
+     * An utility method for calling {@link KeyAnalyzer#compare(Object, 
Object)}
+     */
+    final boolean compareKeys(K key, K other)
+    {
+        if (key == null)
+        {
+            return (other == null);
+        }
+        else if (other == null)
+        {
+            return false;
+        }
+        
+        return keyAnalyzer.compare(key, other) == 0;
+    }
+    
+    /**
+     * A basic implementation of {@link Entry}
+     */
+    abstract static class BasicEntry<K, V> implements Map.Entry<K, V>, 
Serializable
+    {
+        private static final long serialVersionUID = -944364551314110330L;
+
+        protected K key;
+        
+        protected V value;
+        
+        private transient int hashCode = 0;
+        
+        public BasicEntry(K key, V value)
+        {
+            this.key = key;
+            this.value = value;
+        }
+        
+        /**
+         * Replaces the current key and value with the provided
+         * key &amp; value
+         */
+        public V setKeyValue(K key, V value)
+        {
+            this.key = key;
+            this.hashCode = 0;
+            return setValue(value);
+        }
+        
+        @Override
+        public K getKey()
+        {
+            return key;
+        }
+        
+        @Override
+        public V getValue()
+        {
+            return value;
+        }
+        
+        @Override
+        public V setValue(V value)
+        {
+            V previous = this.value;
+            this.value = value;
+            return previous;
+        }
+        
+        @Override
+        public int hashCode()
+        {
+            if (hashCode == 0)
+                hashCode = (key != null ? key.hashCode() : 0);
+            return hashCode;
+        }
+        
+        @Override
+        public boolean equals(Object o)
+        {
+            if (o == this)
+            {
+                return true;
+            }
+            else if (!(o instanceof Map.Entry<?, ?>))
+            {
+                return false;
+            }
+            
+            Map.Entry<?, ?> other = (Map.Entry<?, ?>)o;
+            return Tries.areEqual(key, other.getKey()) && 
Tries.areEqual(value, other.getValue());
+        }
+        
+        @Override
+        public String toString()
+        {
+            return key + "=" + value;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/trie/Cursor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/utils/trie/Cursor.java 
b/src/java/org/apache/cassandra/index/sasi/utils/trie/Cursor.java
new file mode 100644
index 0000000..513fae0
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sasi/utils/trie/Cursor.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2005-2010 Roger Kapsi, Sam Berlin
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ */
+
+package org.apache.cassandra.index.sasi.utils.trie;
+
+import java.util.Map;
+import java.util.Map.Entry;
+
+/**
+ * This class is taken from https://github.com/rkapsi/patricia-trie (v0.6), 
and slightly modified
+ * to correspond to Cassandra code style, as the only Patricia Trie 
implementation,
+ * which supports pluggable key comparators (e.g. commons-collections 
PatriciaTrie (which is based
+ * on rkapsi/patricia-trie project) only supports String keys)
+ * but unfortunately is not deployed to the maven central as a downloadable 
artifact.
+ */
+
+/**
+ * A {@link Cursor} can be used to traverse a {@link Trie}, visit each node 
+ * step by step and make {@link Decision}s on each step how to continue with 
+ * traversing the {@link Trie}.
+ */
+public interface Cursor<K, V>
+{
+    
+    /**
+     * The {@link Decision} tells the {@link Cursor} what to do on each step 
+     * while traversing the {@link Trie}.
+     * 
+     * NOTE: Not all operations that work with a {@link Cursor} support all 
+     * {@link Decision} types
+     */
+    enum Decision
+    {
+        
+        /**
+         * Exit the traverse operation
+         */
+        EXIT, 
+        
+        /**
+         * Continue with the traverse operation
+         */
+        CONTINUE, 
+        
+        /**
+         * Remove the previously returned element
+         * from the {@link Trie} and continue
+         */
+        REMOVE, 
+        
+        /**
+         * Remove the previously returned element
+         * from the {@link Trie} and exit from the
+         * traverse operation
+         */
+        REMOVE_AND_EXIT
+    }
+    
+    /**
+     * Called for each {@link Entry} in the {@link Trie}. Return 
+     * {@link Decision#EXIT} to finish the {@link Trie} operation,
+     * {@link Decision#CONTINUE} to go to the next {@link Entry},
+     * {@link Decision#REMOVE} to remove the {@link Entry} and
+     * continue iterating or {@link Decision#REMOVE_AND_EXIT} to
+     * remove the {@link Entry} and stop iterating.
+     * 
+     * Note: Not all operations support {@link Decision#REMOVE}.
+     */
+    Decision select(Map.Entry<? extends K, ? extends V> entry);
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/src/java/org/apache/cassandra/index/sasi/utils/trie/KeyAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/index/sasi/utils/trie/KeyAnalyzer.java 
b/src/java/org/apache/cassandra/index/sasi/utils/trie/KeyAnalyzer.java
new file mode 100644
index 0000000..9cab4ae
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sasi/utils/trie/KeyAnalyzer.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010 Roger Kapsi
+ *
+ *   Licensed under the Apache License, Version 2.0 (the "License");
+ *   you may not use this file except in compliance with the License.
+ *   You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *   distributed under the License is distributed on an "AS IS" BASIS,
+ *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *   See the License for the specific language governing permissions and
+ *   limitations under the License.
+ */
+
+package org.apache.cassandra.index.sasi.utils.trie;
+
+import java.util.Comparator;
+
+/**
+ * This class is taken from https://github.com/rkapsi/patricia-trie (v0.6), 
and slightly modified
+ * to correspond to Cassandra code style, as the only Patricia Trie 
implementation,
+ * which supports pluggable key comparators (e.g. commons-collections 
PatriciaTrie (which is based
+ * on rkapsi/patricia-trie project) only supports String keys)
+ * but unfortunately is not deployed to the maven central as a downloadable 
artifact.
+ */
+
+/**
+ * The {@link KeyAnalyzer} provides bit-level access to keys
+ * for the {@link PatriciaTrie}.
+ */
+public interface KeyAnalyzer<K> extends Comparator<K>
+{
+    /**
+     * Returned by {@link #bitIndex(Object, Object)} if a key's
+     * bits were all zero (0).
+     */
+    int NULL_BIT_KEY = -1;
+    
+    /** 
+     * Returned by {@link #bitIndex(Object, Object)} if a the
+     * bits of two keys were all equal.
+     */
+    int EQUAL_BIT_KEY = -2;
+    
+    /**
+     * Returned by {@link #bitIndex(Object, Object)} if a keys 
+     * indices are out of bounds.
+     */
+    int OUT_OF_BOUNDS_BIT_KEY = -3;
+    
+    /**
+     * Returns the key's length in bits.
+     */
+    int lengthInBits(K key);
+    
+    /**
+     * Returns {@code true} if a key's bit it set at the given index.
+     */
+    boolean isBitSet(K key, int bitIndex);
+    
+    /**
+     * Returns the index of the first bit that is different in the two keys.
+     */
+    int bitIndex(K key, K otherKey);
+    
+    /**
+     * Returns {@code true} if the second argument is a 
+     * prefix of the first argument.
+     */
+    boolean isPrefix(K key, K prefix);
+}

[08/14] cassandra git commit: Integrate SASI index into Cassandra

Reply via email to