Treat IN values as a set instead of a list Patch by Tyler Hobbs; reviewed by Benjamin Lerer for CASSANDRA-12420
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/cdd535fc Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/cdd535fc Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/cdd535fc Branch: refs/heads/cassandra-2.2 Commit: cdd535fcac4ba79bb371e8373c6504d9e3978853 Parents: 6fb89b9 Author: Tyler Hobbs <tylerlho...@gmail.com> Authored: Tue Sep 27 11:51:41 2016 -0500 Committer: Tyler Hobbs <tylerlho...@gmail.com> Committed: Tue Sep 27 11:51:41 2016 -0500 ---------------------------------------------------------------------- CHANGES.txt | 3 ++ NEWS.txt | 7 +++ .../cql3/statements/SelectStatement.java | 45 +++++++++----------- .../entities/FrozenCollectionsTest.java | 8 ++-- .../validation/operations/SelectLimitTest.java | 2 +- .../SelectMultiColumnRelationTest.java | 6 +-- .../operations/SelectOrderByTest.java | 8 ++-- 7 files changed, 41 insertions(+), 38 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 1438e98..b778444 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,7 @@ 2.1.16 + * Avoid infinitely looping result set when paging SELECT queries with + an IN clause with duplicate keys by treating the IN values as a set instead + of a list (CASSANDRA-12420) * Add system property to set the max number of native transport requests in queue (CASSANDRA-11363) * Include column family parameter when -st and -et are provided (CASSANDRA-11866) * Fix queries with empty ByteBuffer values in clustering column restrictions (CASSANDRA-12127) http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/NEWS.txt ---------------------------------------------------------------------- diff --git a/NEWS.txt b/NEWS.txt index 6a70adc..2db34ed 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -18,6 +18,13 @@ using the provided 'sstableupgrade' tool. Upgrading --------- + - Duplicate partition keys in SELECT statement IN clauses will now be + filtered out, meaning that duplicate results will no longer be returned. + Futhermore, the partitions will be returned in the order of the sorted + partition keys instead of the order of the IN values; this matches the + behavior of Cassandra 2.2+. This was necessary to avoid an infinitely + looping result set when combined with paging under some circumstances. + See CASSANDRA-12420 for details. - The ReversedType behaviour has been corrected for clustering columns of BYTES type containing empty value. Scrub should be run on the existing SSTables containing a descending clustering column of BYTES type to correct http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java index 40f3f33..fe63b44 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java @@ -71,13 +71,6 @@ public class SelectStatement implements CQLStatement private static final int DEFAULT_COUNT_PAGE_SIZE = 10000; - /** - * In the current version a query containing duplicate values in an IN restriction on the partition key will - * cause the same record to be returned multiple time. This behavior will be changed in 3.0 but until then - * we will log a warning the first time this problem occurs. - */ - private static volatile boolean HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES; - private final int boundTerms; public final CFMetaData cfm; public final Parameters parameters; @@ -682,9 +675,9 @@ public class SelectStatement implements CQLStatement : limit; } - private Collection<ByteBuffer> getKeys(final QueryOptions options) throws InvalidRequestException + private NavigableSet<ByteBuffer> getKeys(final QueryOptions options) throws InvalidRequestException { - List<ByteBuffer> keys = new ArrayList<ByteBuffer>(); + TreeSet<ByteBuffer> sortedKeys = new TreeSet<>(cfm.getKeyValidator()); CBuilder builder = cfm.getKeyValidatorAsCType().builder(); for (ColumnDefinition def : cfm.partitionKeyColumns()) { @@ -695,18 +688,14 @@ public class SelectStatement implements CQLStatement if (builder.remainingCount() == 1) { - if (values.size() > 1 && !HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES && containsDuplicates(values)) - { - // This approach does not fully prevent race conditions but it is not a big deal. - HAS_LOGGED_WARNING_FOR_IN_RESTRICTION_WITH_DUPLICATES = true; - logger.warn("SELECT queries with IN restrictions on the partition key containing duplicate values will return duplicate rows."); - } - for (ByteBuffer val : values) { if (val == null) throw new InvalidRequestException(String.format("Invalid null value for partition key part %s", def.name)); - keys.add(builder.buildWith(val).toByteBuffer()); + + ByteBuffer keyBuffer = builder.buildWith(val).toByteBuffer(); + validateKey(keyBuffer); + sortedKeys.add(keyBuffer); } } else @@ -720,18 +709,22 @@ public class SelectStatement implements CQLStatement builder.add(val); } } - return keys; + return sortedKeys; } - /** - * Checks if the specified list contains duplicate values. - * - * @param values the values to check - * @return <code>true</code> if the specified list contains duplicate values, <code>false</code> otherwise. - */ - private static boolean containsDuplicates(List<ByteBuffer> values) + private void validateKey(ByteBuffer keyBuffer) throws InvalidRequestException { - return new HashSet<>(values).size() < values.size(); + if (keyBuffer == null || keyBuffer.remaining() == 0) + throw new InvalidRequestException("Key may not be empty"); + + try + { + cfm.getKeyValidator().validate(keyBuffer); + } + catch (MarshalException exc) + { + throw new InvalidRequestException("Partition key IN clause contained invalid value: " + exc); + } } private ByteBuffer getKeyBound(Bound b, QueryOptions options) throws InvalidRequestException http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java index beed560..fb50e83 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/entities/FrozenCollectionsTest.java @@ -85,8 +85,8 @@ public class FrozenCollectionsTest extends CQLTester ); assertRows(execute("SELECT * FROM %s WHERE k IN ?", list(set(4, 5, 6), set())), - row(set(4, 5, 6), 0), - row(set(), 0) + row(set(), 0), + row(set(4, 5, 6), 0) ); assertRows(execute("SELECT * FROM %s WHERE token(k) >= token(?)", set(4, 5, 6)), @@ -154,9 +154,9 @@ public class FrozenCollectionsTest extends CQLTester ); assertRows(execute("SELECT * FROM %s WHERE k IN ?", list(map(set(4, 5, 6), list(1, 2, 3)), map(), map(set(), list(1, 2, 3)))), - row(map(set(4, 5, 6), list(1, 2, 3)), 0), row(map(), 0), - row(map(set(), list(1, 2, 3)), 0) + row(map(set(), list(1, 2, 3)), 0), + row(map(set(4, 5, 6), list(1, 2, 3)), 0) ); assertRows(execute("SELECT * FROM %s WHERE token(k) >= token(?)", map(set(4, 5, 6), list(1, 2, 3))), http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java index eb2be60..5cd24dd 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectLimitTest.java @@ -72,7 +72,7 @@ public class SelectLimitTest extends CQLTester // Check that we do limit the output to 1 *and* that we respect query // order of keys (even though 48 is after 2) assertRows(execute("SELECT * FROM %s WHERE userid IN (48, 2) LIMIT 1"), - row(48, "http://foo.com", 42L)); + row(2, "http://foo.com", 42L)); } /** http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java index 98dda26..5f82328 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectMultiColumnRelationTest.java @@ -629,10 +629,10 @@ public class SelectMultiColumnRelationTest extends CQLTester // same query, but reversed order for the IN values assertRows(execute("SELECT * FROM %s WHERE a IN (?, ?) AND (b, c, d) IN (?, ?)", 1, 0, tuple(0, 1, 1), tuple(0, 1, 0)), - row(1, 0, 1, 0), - row(1, 0, 1, 1), row(0, 0, 1, 0), - row(0, 0, 1, 1) + row(0, 0, 1, 1), + row(1, 0, 1, 0), + row(1, 0, 1, 1) ); assertRows(execute("SELECT * FROM %s WHERE a IN (?, ?) and (b, c) IN ((?, ?))", 0, 1, 0, 1), http://git-wip-us.apache.org/repos/asf/cassandra/blob/cdd535fc/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java index cf923bc..2cc0d6c 100644 --- a/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/SelectOrderByTest.java @@ -517,12 +517,12 @@ public class SelectOrderByTest extends CQLTester row(2), row(0)); assertRows(execute("SELECT v FROM %s WHERE k IN (1, 0)"), - row(3), - row(4), - row(5), row(0), row(1), - row(2)); + row(2), + row(3), + row(4), + row(5)); assertRows(execute("SELECT v FROM %s WHERE k IN (1, 0) ORDER BY c1 ASC"), row(0),