Support DISTINCT for static columns This also fix the behaviour when DISTINCT is not used.
patch by slebresne; reviewed by thobbs for CASSANDRA-7305 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/6e4dca02 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/6e4dca02 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/6e4dca02 Branch: refs/heads/trunk Commit: 6e4dca02a720ac9277370ba1d4cf387ef1a3cfd4 Parents: 77bbcc1 Author: Sylvain Lebresne <sylv...@datastax.com> Authored: Wed Jun 25 11:30:53 2014 +0200 Committer: Sylvain Lebresne <sylv...@datastax.com> Committed: Thu Jun 26 10:12:58 2014 +0200 ---------------------------------------------------------------------- CHANGES.txt | 2 + NEWS.txt | 9 ++- .../cql3/statements/SelectStatement.java | 72 +++++++++++--------- 3 files changed, 48 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 84be96d..c3fe8d7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -21,6 +21,8 @@ * Account for range tombstones in min/max column names (CASSANDRA-7235) * Improve sub range repair validation (CASSANDRA-7317) * Accept subtypes for function results, type casts (CASSANDRA-6766) + * Support DISTINCT for static columns and fix behaviour when DISTINC is + not use (CASSANDRA-7305). Merged from 1.2: * Expose global ColumnFamily metrics (CASSANDRA-7273) * cqlsh: Fix CompositeType columns in DESCRIBE TABLE output (CASSANDRA-7399) http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/NEWS.txt ---------------------------------------------------------------------- diff --git a/NEWS.txt b/NEWS.txt index 422330c..0fbc20f 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -16,11 +16,16 @@ using the provided 'sstableupgrade' tool. 2.0.9 ===== -Operations ----------- +Upgrading +--------- - Default values for read_repair_chance and local_read_repair_chance have been swapped. Namely, default read_repair_chance is now set to 0.0, and default local_read_repair_chance to 0.1. + - Queries selecting only CQL static columns were (mistakenly) not returning one + result per row in the partition. This has been fixed and a SELECT DISTINCT + can be used when only the static column of a partition needs to be fetch + without fetching the whole partition. But if you use static columns, please + make sure this won't affect you (see CASSANDRA-7305 for details). 2.0.8 http://git-wip-us.apache.org/repos/asf/cassandra/blob/6e4dca02/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java index 7a91517..f106402 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java @@ -427,12 +427,26 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache } } + private ColumnSlice makeStaticSlice() + { + ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder(); + // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the + // same effect while saving a few CPU cycles. + return isReversed + ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER) + : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange()); + } + private IDiskAtomFilter makeFilter(List<ByteBuffer> variables, int limit) throws InvalidRequestException { + int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount(); if (parameters.isDistinct) { - return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, -1); + // For distinct, we only care about fetching the beginning of each partition. If we don't have + // static columns, we in fact only care about the first cell, so we query only that (we don't "group"). + // If we do have static columns, we do need to fetch the first full group (to have the static columns values). + return new SliceQueryFilter(ColumnSlice.ALL_COLUMNS_ARRAY, false, 1, selectsStaticColumns ? toGroup : -1); } else if (isColumnRange()) { @@ -440,7 +454,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache // to account for the grouping of columns. // Since that doesn't work for maps/sets/lists, we now use the compositesToGroup option of SliceQueryFilter. // But we must preserve backward compatibility too (for mixed version cluster that is). - int toGroup = cfDef.isCompact ? -1 : cfDef.clusteringColumnsCount(); List<ByteBuffer> startBounds = getRequestedBound(Bound.START, variables); List<ByteBuffer> endBounds = getRequestedBound(Bound.END, variables); assert startBounds.size() == endBounds.size(); @@ -448,21 +461,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache // Handles fetching static columns. Note that for 2i, the filter is just used to restrict // the part of the index to query so adding the static slice would be useless and confusing. // For 2i, static columns are retrieve in CompositesSearcher with each index hit. - ColumnSlice staticSlice = null; - if (selectsStaticColumns && !usesSecondaryIndexing) - { - ColumnNameBuilder staticPrefix = cfDef.cfm.getStaticColumnNameBuilder(); - // Note: we could use staticPrefix.build() for the start bound, but EMPTY_BYTE_BUFFER gives us the - // same effect while saving a few CPU cycles. - staticSlice = isReversed - ? new ColumnSlice(staticPrefix.buildAsEndOfRange(), ByteBufferUtil.EMPTY_BYTE_BUFFER) - : new ColumnSlice(ByteBufferUtil.EMPTY_BYTE_BUFFER, staticPrefix.buildAsEndOfRange()); - - // In the case where we only select static columns, we want to really only check the static columns. - // So we return early as the rest of that method would actually make us query everything - if (selectsOnlyStaticColumns) - return sliceFilter(staticSlice, limit, toGroup); - } + ColumnSlice staticSlice = selectsStaticColumns && !usesSecondaryIndexing + ? makeStaticSlice() + : null; // The case where startBounds == 1 is common enough that it's worth optimizing if (startBounds.size() == 1) @@ -1088,7 +1089,7 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache ? ((CompositeType)cfDef.cfm.getKeyValidator()).split(key) : new ByteBuffer[]{ key }; - if (parameters.isDistinct) + if (parameters.isDistinct && !selectsStaticColumns) { if (!cf.hasOnlyTombstones(now)) { @@ -1331,6 +1332,23 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache return false; } + private void validateDistinctSelection() + throws InvalidRequestException + { + Collection<CFDefinition.Name> requestedColumns = selection.getColumns(); + for (CFDefinition.Name name : requestedColumns) + if (name.kind != CFDefinition.Name.Kind.KEY_ALIAS && name.kind != CFDefinition.Name.Kind.STATIC) + throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns and/or static columns (not %s)", name)); + + // If it's a key range, we require that all partition key columns are selected so we don't have to bother with post-query grouping. + if (!isKeyRange) + return; + + for (CFDefinition.Name name : cfDef.partitionKeys()) + if (!requestedColumns.contains(name)) + throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name)); + } + public static class RawStatement extends CFStatement { private final Parameters parameters; @@ -1363,9 +1381,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache ? Selection.wildcard(cfDef) : Selection.fromSelectors(cfDef, selectClause); - if (parameters.isDistinct) - validateDistinctSelection(selection.getColumns(), cfDef.partitionKeys()); - SelectStatement stmt = new SelectStatement(cfDef, boundNames.size(), parameters, selection, prepareLimit(boundNames)); /* @@ -1440,6 +1455,9 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache checkNeedsFiltering(stmt); + if (parameters.isDistinct) + stmt.validateDistinctSelection(); + return new ParsedStatement.Prepared(stmt, boundNames); } @@ -1961,18 +1979,6 @@ public class SelectStatement implements CQLStatement, MeasurableForPreparedCache } } - private void validateDistinctSelection(Collection<CFDefinition.Name> requestedColumns, Collection<CFDefinition.Name> partitionKey) - throws InvalidRequestException - { - for (CFDefinition.Name name : requestedColumns) - if (!partitionKey.contains(name)) - throw new InvalidRequestException(String.format("SELECT DISTINCT queries must only request partition key columns (not %s)", name)); - - for (CFDefinition.Name name : partitionKey) - if (!requestedColumns.contains(name)) - throw new InvalidRequestException(String.format("SELECT DISTINCT queries must request all the partition key columns (missing %s)", name)); - } - private boolean containsAlias(final ColumnIdentifier name) { return Iterables.any(selectClause, new Predicate<RawSelector>()