Improve SASI range iterator efficiency on intersection with an empty range.
Patch by Corentin Chary; reviewed by Alex Petrov for CASSANDRA-12915. Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2c111d15 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2c111d15 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2c111d15 Branch: refs/heads/trunk Commit: 2c111d15bb080283b9b98d48fab4bcf4db515b5a Parents: 9efa682 Author: Alex Petrov <oleksandr.pet...@gmail.com> Authored: Fri Mar 10 13:28:16 2017 +0100 Committer: Alex Petrov <oleksandr.pet...@gmail.com> Committed: Fri Mar 10 13:31:15 2017 +0100 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../cassandra/index/sasi/TermIterator.java | 2 +- .../index/sasi/plan/QueryController.java | 3 - .../sasi/utils/RangeIntersectionIterator.java | 9 ++- .../index/sasi/utils/RangeIterator.java | 79 +++++++++++++++----- .../index/sasi/utils/RangeUnionIterator.java | 9 ++- .../cassandra/index/sasi/SASIIndexTest.java | 6 +- .../index/sasi/utils/LongIteratorTest.java | 56 ++++++++++++++ .../utils/RangeIntersectionIteratorTest.java | 78 ++++++++++++++++++- .../sasi/utils/RangeUnionIteratorTest.java | 72 +++++++++++++++++- 10 files changed, 283 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index acef1c2..302a028 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.11.0 + * Improve SASI range iterator efficiency on intersection with an empty range (CASSANDRA-12915). * Fix equality comparisons of columns using the duration type (CASSANDRA-13174) * Obfuscate password in stress-graphs (CASSANDRA-12233) * Move to FastThreadLocalThread and FastThreadLocal (CASSANDRA-13034) http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/src/java/org/apache/cassandra/index/sasi/TermIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/TermIterator.java b/src/java/org/apache/cassandra/index/sasi/TermIterator.java index 03dea18..85f81b0 100644 --- a/src/java/org/apache/cassandra/index/sasi/TermIterator.java +++ b/src/java/org/apache/cassandra/index/sasi/TermIterator.java @@ -157,7 +157,7 @@ public class TermIterator extends RangeIterator<Long, Token> e.checkpoint(); RangeIterator<Long, Token> ranges = RangeUnionIterator.build(tokens); - return ranges == null ? null : new TermIterator(e, ranges, referencedIndexes); + return new TermIterator(e, ranges, referencedIndexes); } catch (Throwable ex) { http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java b/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java index 155cd4f..22fca68 100644 --- a/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java +++ b/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java @@ -144,9 +144,6 @@ public class QueryController @SuppressWarnings("resource") // RangeIterators are closed by releaseIndexes RangeIterator<Long, Token> index = TermIterator.build(e.getKey(), e.getValue()); - if (index == null) - continue; - builder.add(index); perIndexUnions.add(index); } http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java b/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java index 02d9527..bd8c725 100644 --- a/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java +++ b/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java @@ -59,10 +59,13 @@ public class RangeIntersectionIterator protected RangeIterator<K, D> buildIterator() { - // if the range is disjoint we can simply return empty - // iterator of any type, because it's not going to produce any results. + // if the range is disjoint or we have an intersection with an empty set, + // we can simply return an empty iterator, because it's not going to produce any results. if (statistics.isDisjoint()) - return new BounceIntersectionIterator<>(statistics, new PriorityQueue<RangeIterator<K, D>>(1)); + return new EmptyRangeIterator<>(); + + if (rangeCount() == 1) + return ranges.poll(); switch (strategy) { http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java b/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java index 1b5aee4..06c5a6a 100644 --- a/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java +++ b/src/java/org/apache/cassandra/index/sasi/utils/RangeIterator.java @@ -42,6 +42,9 @@ public abstract class RangeIterator<K extends Comparable<K>, T extends CombinedV public RangeIterator(K min, K max, long count) { + if (min == null || max == null || count == 0) + assert min == null && max == null && (count == 0 || count == -1); + this.min = min; this.current = min; this.max = max; @@ -148,10 +151,11 @@ public abstract class RangeIterator<K extends Comparable<K>, T extends CombinedV public Builder<K, D> add(RangeIterator<K, D> range) { - if (range == null || range.getMinimum() == null || range.getMaximum() == null) + if (range == null) return this; - ranges.add(range); + if (range.getCount() > 0) + ranges.add(range); statistics.update(range); return this; @@ -168,17 +172,18 @@ public abstract class RangeIterator<K extends Comparable<K>, T extends CombinedV public final RangeIterator<K, D> build() { - switch (rangeCount()) - { - case 0: - return null; - - case 1: - return ranges.poll(); + if (rangeCount() == 0) + return new EmptyRangeIterator<>(); + else + return buildIterator(); + } - default: - return buildIterator(); - } + public static class EmptyRangeIterator<K extends Comparable<K>, D extends CombinedValue<K>> extends RangeIterator<K, D> + { + EmptyRangeIterator() { super(null, null, 0); } + public D computeNext() { return endOfData(); } + protected void performSkipTo(K nextToken) { } + public void close() { } } protected abstract RangeIterator<K, D> buildIterator(); @@ -197,7 +202,7 @@ public abstract class RangeIterator<K extends Comparable<K>, T extends CombinedV // tracks if all of the added ranges overlap, which is useful in case of intersection, // as it gives direct answer as to such iterator is going to produce any results. - protected boolean isOverlapping = true; + private boolean isOverlapping = true; public Statistics(IteratorType iteratorType) { @@ -217,15 +222,15 @@ public abstract class RangeIterator<K extends Comparable<K>, T extends CombinedV switch (iteratorType) { case UNION: - min = min == null || min.compareTo(range.getMinimum()) > 0 ? range.getMinimum() : min; - max = max == null || max.compareTo(range.getMaximum()) < 0 ? range.getMaximum() : max; + min = nullSafeMin(min, range.getMinimum()); + max = nullSafeMax(max, range.getMaximum()); break; case INTERSECTION: // minimum of the intersection is the biggest minimum of individual iterators - min = min == null || min.compareTo(range.getMinimum()) < 0 ? range.getMinimum() : min; + min = nullSafeMax(min, range.getMinimum()); // maximum of the intersection is the smallest maximum of individual iterators - max = max == null || max.compareTo(range.getMaximum()) > 0 ? range.getMaximum() : max; + max = nullSafeMin(max, range.getMaximum()); break; default: @@ -240,7 +245,6 @@ public abstract class RangeIterator<K extends Comparable<K>, T extends CombinedV maxRange = maxRange == null ? range : max(maxRange, range); tokenCount += range.getCount(); - } private RangeIterator<K, D> min(RangeIterator<K, D> a, RangeIterator<K, D> b) @@ -271,9 +275,46 @@ public abstract class RangeIterator<K extends Comparable<K>, T extends CombinedV return isOverlapping(a.getCurrent(), a.getMaximum(), b); } + /** + * Ranges are overlapping the following cases: + * + * * When they have a common subrange: + * + * min b.current max b.max + * +---------|--------------+------------| + * + * b.current min max b.max + * |--------------+---------+------------| + * + * min b.current b.max max + * +----------|-------------|------------+ + * + * + * If either range is empty, they're disjoint. + */ @VisibleForTesting protected static <K extends Comparable<K>, D extends CombinedValue<K>> boolean isOverlapping(K min, K max, RangeIterator<K, D> b) { - return min.compareTo(b.getMaximum()) <= 0 && b.getCurrent().compareTo(max) <= 0; + return (min != null && max != null) && + b.getCount() != 0 && + (min.compareTo(b.getMaximum()) <= 0 && b.getCurrent().compareTo(max) <= 0); + } + + @SuppressWarnings("unchecked") + private static <T extends Comparable> T nullSafeMin(T a, T b) + { + if (a == null) return b; + if (b == null) return a; + + return a.compareTo(b) > 0 ? b : a; + } + + @SuppressWarnings("unchecked") + private static <T extends Comparable> T nullSafeMax(T a, T b) + { + if (a == null) return b; + if (b == null) return a; + + return a.compareTo(b) > 0 ? a : b; } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java b/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java index 4be460c..599de07 100644 --- a/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java +++ b/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java @@ -153,7 +153,14 @@ public class RangeUnionIterator<K extends Comparable<K>, D extends CombinedValue protected RangeIterator<K, D> buildIterator() { - return new RangeUnionIterator<>(statistics, ranges); + switch (rangeCount()) + { + case 1: + return ranges.poll(); + + default: + return new RangeUnionIterator<>(statistics, ranges); + } } } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java index 399aa40..3bd27e6 100644 --- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java +++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java @@ -2253,7 +2253,7 @@ public class SASIIndexTest IndexMemtable afterFlushMemtable = index.getCurrentMemtable(); Assert.assertNotSame(afterFlushMemtable, beforeFlushMemtable); - Assert.assertNull(afterFlushMemtable.search(expression)); + Assert.assertEquals(afterFlushMemtable.search(expression).getCount(), 0); Assert.assertEquals(0, index.getPendingMemtables().size()); loadData(new HashMap<String, Pair<String, Integer>>() @@ -2279,7 +2279,7 @@ public class SASIIndexTest index.discardMemtable(store.getTracker().getView().getCurrentMemtable()); Assert.assertEquals(0, index.getPendingMemtables().size()); - Assert.assertNull(index.searchMemtable(expression)); + Assert.assertEquals(index.searchMemtable(expression).getCount(), 0); // test discarding data from memtable loadData(new HashMap<String, Pair<String, Integer>>() @@ -2293,7 +2293,7 @@ public class SASIIndexTest Assert.assertTrue(index.searchMemtable(expression).getCount() > 0); index.switchMemtable(); - Assert.assertNull(index.searchMemtable(expression)); + Assert.assertEquals(index.searchMemtable(expression).getCount(), 0); } private static ColumnFamilyStore loadData(Map<String, Pair<String, Integer>> data, boolean forceFlush) http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/test/unit/org/apache/cassandra/index/sasi/utils/LongIteratorTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/utils/LongIteratorTest.java b/test/unit/org/apache/cassandra/index/sasi/utils/LongIteratorTest.java new file mode 100644 index 0000000..05db33a --- /dev/null +++ b/test/unit/org/apache/cassandra/index/sasi/utils/LongIteratorTest.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.index.sasi.utils; + +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; + +import static org.apache.cassandra.index.sasi.utils.LongIterator.convert; + +public class LongIteratorTest +{ + @Test + public void testEmptyIterator() throws IOException { + LongIterator it = new LongIterator(new long[] { }); + + Assert.assertEquals(0, it.getCount()); + Assert.assertEquals(null, it.getCurrent()); + Assert.assertEquals(null, it.getMaximum()); + Assert.assertEquals(null, it.getMinimum()); + Assert.assertFalse(it.hasNext()); + + it.close(); + } + + @Test + public void testBasicITerator() throws IOException { + LongIterator it = new LongIterator(new long[] { 2L, 3L, 5L, 6L }); + + Assert.assertEquals(4L, (long) it.getCount()); + Assert.assertEquals(2L, (long) it.getCurrent()); + Assert.assertEquals(6L, (long) it.getMaximum()); + Assert.assertEquals(2L, (long) it.getMinimum()); + + Assert.assertEquals(2L, (long) it.next().get()); + Assert.assertEquals(3L, (long) it.next().get()); + + it.close(); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/test/unit/org/apache/cassandra/index/sasi/utils/RangeIntersectionIteratorTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/utils/RangeIntersectionIteratorTest.java b/test/unit/org/apache/cassandra/index/sasi/utils/RangeIntersectionIteratorTest.java index 18b9dd7..4dc9e3f 100644 --- a/test/unit/org/apache/cassandra/index/sasi/utils/RangeIntersectionIteratorTest.java +++ b/test/unit/org/apache/cassandra/index/sasi/utils/RangeIntersectionIteratorTest.java @@ -223,7 +223,7 @@ public class RangeIntersectionIteratorTest FileUtils.closeQuietly(tokens); RangeIterator emptyTokens = RangeIntersectionIterator.builder(strategy).build(); - Assert.assertNull(emptyTokens); + Assert.assertEquals(0, emptyTokens.getCount()); builder = RangeIntersectionIterator.builder(strategy); Assert.assertEquals(0L, builder.add((RangeIterator<Long, Token>) null).rangeCount()); @@ -236,6 +236,15 @@ public class RangeIntersectionIteratorTest // because build should return first element if it's only one instead of building yet another iterator Assert.assertEquals(range, single); + // Make a difference between empty and null ranges. + builder = RangeIntersectionIterator.builder(strategy); + builder.add(new LongIterator(new long[] {})); + Assert.assertEquals(0L, builder.rangeCount()); + builder.add(single); + Assert.assertEquals(1L, builder.rangeCount()); + range = builder.build(); + Assert.assertEquals(0, range.getCount()); + // disjoint case builder = RangeIntersectionIterator.builder(); builder.add(new LongIterator(new long[] { 1L, 2L, 3L })); @@ -250,6 +259,73 @@ public class RangeIntersectionIteratorTest } @Test + public void emptyRangeTest() { + RangeIterator.Builder<Long, Token> builder; + + // empty, then non-empty + builder = RangeIntersectionIterator.builder(); + builder.add(new LongIterator(new long[] {})); + builder.add(new LongIterator(new long[] {10})); + assertEmpty(builder.build()); + + builder = RangeIntersectionIterator.builder(); + builder.add(new LongIterator(new long[] {})); + for (int i = 0; i < 10; i++) + builder.add(new LongIterator(new long[] {0, i + 10})); + assertEmpty(builder.build()); + + // non-empty, then empty + builder = RangeIntersectionIterator.builder(); + builder.add(new LongIterator(new long[] {10})); + builder.add(new LongIterator(new long[] {})); + assertEmpty(builder.build()); + + builder = RangeIntersectionIterator.builder(); + for (int i = 0; i < 10; i++) + builder.add(new LongIterator(new long[] {0, i + 10})); + + builder.add(new LongIterator(new long[] {})); + assertEmpty(builder.build()); + + // empty, then non-empty then empty again + builder = RangeIntersectionIterator.builder(); + builder.add(new LongIterator(new long[] {})); + builder.add(new LongIterator(new long[] {0, 10})); + builder.add(new LongIterator(new long[] {})); + assertEmpty(builder.build()); + + builder = RangeIntersectionIterator.builder(); + builder.add(new LongIterator(new long[] {})); + for (int i = 0; i < 10; i++) + builder.add(new LongIterator(new long[] {0, i + 10})); + builder.add(new LongIterator(new long[] {})); + assertEmpty(builder.build()); + + // non-empty, empty, then non-empty again + builder = RangeIntersectionIterator.builder(); + builder.add(new LongIterator(new long[] {0, 10})); + builder.add(new LongIterator(new long[] {})); + builder.add(new LongIterator(new long[] {0, 10})); + assertEmpty(builder.build()); + + builder = RangeIntersectionIterator.builder(); + for (int i = 0; i < 5; i++) + builder.add(new LongIterator(new long[] {0, i + 10})); + builder.add(new LongIterator(new long[] {})); + for (int i = 5; i < 10; i++) + builder.add(new LongIterator(new long[] {0, i + 10})); + assertEmpty(builder.build()); + } + + public static void assertEmpty(RangeIterator<Long, Token> range) + { + Assert.assertNull(range.getMinimum()); + Assert.assertNull(range.getMaximum()); + Assert.assertFalse(range.hasNext()); + Assert.assertEquals(0, range.getCount()); + } + + @Test public void testClose() throws IOException { for (Strategy strategy : Strategy.values()) http://git-wip-us.apache.org/repos/asf/cassandra/blob/2c111d15/test/unit/org/apache/cassandra/index/sasi/utils/RangeUnionIteratorTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/index/sasi/utils/RangeUnionIteratorTest.java b/test/unit/org/apache/cassandra/index/sasi/utils/RangeUnionIteratorTest.java index f69086b..162b1c6 100644 --- a/test/unit/org/apache/cassandra/index/sasi/utils/RangeUnionIteratorTest.java +++ b/test/unit/org/apache/cassandra/index/sasi/utils/RangeUnionIteratorTest.java @@ -214,7 +214,7 @@ public class RangeUnionIteratorTest FileUtils.closeQuietly(tokens); RangeIterator emptyTokens = RangeUnionIterator.builder().build(); - Assert.assertNull(emptyTokens); + Assert.assertEquals(0, emptyTokens.getCount()); builder = RangeUnionIterator.builder(); Assert.assertEquals(0L, builder.add((RangeIterator<Long, Token>) null).rangeCount()); @@ -303,4 +303,74 @@ public class RangeUnionIteratorTest Assert.assertNull(empty.skipTo(3L)); Assert.assertFalse(empty.hasNext()); } + + @Test + public void emptyRangeTest() { + RangeIterator.Builder<Long, Token> builder; + RangeIterator<Long, Token> range; + // empty, then non-empty + builder = RangeUnionIterator.builder(); + builder.add(new LongIterator(new long[] {})); + for (int i = 0; i < 10; i++) + builder.add(new LongIterator(new long[] {i + 10})); + range = builder.build(); + Assert.assertEquals(Long.valueOf(10), range.getMinimum()); + Assert.assertEquals(Long.valueOf(19), range.getMaximum()); + Assert.assertTrue(range.hasNext()); + Assert.assertEquals(10, range.getCount()); + + builder = RangeUnionIterator.builder(); + builder.add(new LongIterator(new long[] {})); + builder.add(new LongIterator(new long[] {10})); + range = builder.build(); + Assert.assertEquals(Long.valueOf(10), range.getMinimum()); + Assert.assertEquals(Long.valueOf(10), range.getMaximum()); + Assert.assertTrue(range.hasNext()); + Assert.assertEquals(1, range.getCount()); + + // non-empty, then empty + builder = RangeUnionIterator.builder(); + for (int i = 0; i < 10; i++) + builder.add(new LongIterator(new long[] {i + 10})); + builder.add(new LongIterator(new long[] {})); + range = builder.build(); + Assert.assertEquals(Long.valueOf(10), range.getMinimum()); + Assert.assertEquals(Long.valueOf(19), range.getMaximum()); + Assert.assertTrue(range.hasNext()); + Assert.assertEquals(10, range.getCount()); + + builder = RangeUnionIterator.builder(); + builder.add(new LongIterator(new long[] {10})); + builder.add(new LongIterator(new long[] {})); + range = builder.build(); + Assert.assertEquals(Long.valueOf(10), range.getMinimum()); + Assert.assertEquals(Long.valueOf(10), range.getMaximum()); + Assert.assertTrue(range.hasNext()); + Assert.assertEquals(1, range.getCount()); + + // empty, then non-empty then empty again + builder = RangeUnionIterator.builder(); + builder.add(new LongIterator(new long[] {})); + for (int i = 0; i < 10; i++) + builder.add(new LongIterator(new long[] {i + 10})); + builder.add(new LongIterator(new long[] {})); + range = builder.build(); + Assert.assertEquals(Long.valueOf(10), range.getMinimum()); + Assert.assertEquals(Long.valueOf(19), range.getMaximum()); + Assert.assertTrue(range.hasNext()); + Assert.assertEquals(10, range.getCount()); + + // non-empty, empty, then non-empty again + builder = RangeUnionIterator.builder(); + for (int i = 0; i < 5; i++) + builder.add(new LongIterator(new long[] {i + 10})); + builder.add(new LongIterator(new long[] {})); + for (int i = 5; i < 10; i++) + builder.add(new LongIterator(new long[] {i + 10})); + range = builder.build(); + Assert.assertEquals(Long.valueOf(10), range.getMinimum()); + Assert.assertEquals(Long.valueOf(19), range.getMaximum()); + Assert.assertTrue(range.hasNext()); + Assert.assertEquals(10, range.getCount()); + } } \ No newline at end of file