This is an automated email from the ASF dual-hosted git repository. mck pushed a commit to branch cassandra-5.0 in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-5.0 by this push: new cada1a13c8 Vector search should be able to restrict on clustering keys when filtering isn't required cada1a13c8 is described below commit cada1a13c8b77fc672d67d754912cf28a7120e3c Author: Mick Semb Wever <m...@apache.org> AuthorDate: Tue Apr 9 00:11:20 2024 +0200 Vector search should be able to restrict on clustering keys when filtering isn't required patch by Mick Semb Wever; reviewed by Caleb Rackliffe for CASSANDRA-19544 --- CHANGES.txt | 1 + .../cql3/restrictions/StatementRestrictions.java | 17 +++++++--- .../cassandra/cql3/statements/SelectStatement.java | 4 +-- .../CassandraXMLJUnitResultFormatter.java | 3 +- .../index/sai/cql/VectorInvalidQueryTest.java | 36 ++++++++++++++++++++++ 5 files changed, 53 insertions(+), 8 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index eaf27a314c..74d142089c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 5.0-beta2 + * Vector search can restrict on clustering keys when filtering isn't required (CASSANDRA-19544) * Fix FBUtilities' parsing of gcp cos_containerd kernel versions (CASSANDRA-18594) * Clean up KeyRangeIterator classes (CASSANDRA-19428) * Warn clients about possible consistency violations for filtering queries against multiple mutable columns (CASSANDRA-19489) diff --git a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java index 4f6b829191..d5b6a2a6fd 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java @@ -317,8 +317,15 @@ public final class StatementRestrictions var nonIndexedColumns = Stream.concat(nonAnnColumns.stream(), clusteringColumns.stream()) .filter(c -> indexRegistry.listIndexes().stream().noneMatch(i -> i.dependsOn(c))) .collect(Collectors.toList()); + if (!nonIndexedColumns.isEmpty()) - throw invalidRequest(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE); + { + // restrictions on non-clustering columns, or clusterings that still need filtering, are invalid + if (!clusteringColumns.containsAll(nonIndexedColumns) + || partitionKeyRestrictions.hasUnrestrictedPartitionKeyComponents(table) + || clusteringColumnsRestrictions.needFiltering()) + throw invalidRequest(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE); + } } } else @@ -467,7 +474,7 @@ public final class StatementRestrictions /** * This method determines whether a specified column is restricted on equality or something equivalent, like IN. - * It can be used in conjunction with the columns selected by a query to determine which of those columns is + * It can be used in conjunction with the columns selected by a query to determine which of those columns is * already bound by the client (and from its perspective, not retrieved by the database). * * @param column a column from the same table these restrictions are against @@ -779,8 +786,8 @@ public final class StatementRestrictions if (filterRestrictions.isEmpty()) return RowFilter.none(); - // If there is only one replica, we don't need reconciliation at any consistency level. - boolean needsReconciliation = !table.isVirtual() + // If there is only one replica, we don't need reconciliation at any consistency level. + boolean needsReconciliation = !table.isVirtual() && options.getConsistency().needsReconciliation() && Keyspace.open(table.keyspace).getReplicationStrategy().getReplicationFactor().allReplicas > 1; @@ -1041,7 +1048,7 @@ public final class StatementRestrictions // a full partition query, then we include that content. return queriesFullPartitions(); } - + @Override public String toString() { diff --git a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java index 5418159ad6..d8fa830b13 100644 --- a/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java +++ b/src/java/org/apache/cassandra/cql3/statements/SelectStatement.java @@ -120,7 +120,7 @@ public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement "/ LOCAL_ONE / NODE_LOCAL. Consistency level %s was requested. " + "Downgrading the consistency level to %s."; public static final String TOPK_PAGE_SIZE_WARNING = "Top-K queries do not support paging and the page size is set to %d, " + - "which is less than LIMIT %d. The page size has been set to %<d to match the LIMIT."; + "which is less than LIMIT %d. The page size has been set to %d to match the LIMIT."; public final VariableSpecifications bindVariables; public final TableMetadata table; @@ -325,7 +325,7 @@ public class SelectStatement implements CQLStatement.SingleKeyspaceCqlStatement pageSize = limit.count(); limit = getDataLimits(userLimit, userPerPartitionLimit, pageSize, aggregationSpec); options = QueryOptions.withPageSize(options, pageSize); - ClientWarn.instance.warn(String.format(TOPK_PAGE_SIZE_WARNING, oldPageSize, limit.count())); + ClientWarn.instance.warn(String.format(TOPK_PAGE_SIZE_WARNING, oldPageSize, limit.count(), pageSize)); } } diff --git a/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java b/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java index d59be7790c..7a9df05a48 100644 --- a/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java +++ b/test/unit/org/apache/cassandra/CassandraXMLJUnitResultFormatter.java @@ -219,7 +219,8 @@ public class CassandraXMLJUnitResultFormatter implements JUnitResultFormatter, X { // only include properties and system-out if there's failure/error rootElement.appendChild(propsElement); - rootElement.appendChild(systemOutputElement); + if (null != systemOutputElement) + rootElement.appendChild(systemOutputElement); } if (out != null) { Writer wri = null; diff --git a/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java b/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java index 1bfc3a1a57..e26f6d9f83 100644 --- a/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java +++ b/test/unit/org/apache/cassandra/index/sai/cql/VectorInvalidQueryTest.java @@ -324,6 +324,42 @@ public class VectorInvalidQueryTest extends SAITester .isInstanceOf(InvalidRequestException.class).hasRootCauseMessage(StorageAttachedIndex.VECTOR_NON_FLOAT_ERROR); } + @Test + public void canOrderWithWhereOnPrimaryColumns() throws Throwable + { + createTable("CREATE TABLE %s (a int, b int, c int, d int, v vector<float, 2>, PRIMARY KEY ((a,b),c,d))"); + createIndex("CREATE CUSTOM INDEX ON %s(v) USING 'StorageAttachedIndex'"); + + execute("INSERT INTO %s (a, b, c, d, v) VALUES (1, 2, 1, 2, [6.0,1.0])"); + + ResultSet result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE); + assertEquals(1, result.size()); + result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE); + assertEquals(1, result.size()); + result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE); + assertEquals(1, result.size()); + + assertThatThrownBy(() -> executeNet("SELECT * FROM %s WHERE a = 1 AND b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1")) + .isInstanceOf(InvalidQueryException.class).hasMessage(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE); + + createIndex("CREATE CUSTOM INDEX c_idx ON %s(c) USING 'StorageAttachedIndex'"); + + assertThatThrownBy(() -> executeNet("SELECT * FROM %s WHERE a = 1 AND b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1")) + .isInstanceOf(InvalidQueryException.class).hasMessage(StatementRestrictions.ANN_REQUIRES_INDEXED_FILTERING_MESSAGE); + + dropIndex("DROP INDEX %s.c_idx"); + createIndex("CREATE CUSTOM INDEX ON %s(d) USING 'StorageAttachedIndex'"); + + result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE); + assertEquals(1, result.size()); + result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c = 1 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE); + assertEquals(1, result.size()); + result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND d = 2 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE); + assertEquals(1, result.size()); + result = execute("SELECT * FROM %s WHERE a = 1 AND b = 2 AND c > 0 ORDER BY v ANN OF [2.0,1.0] LIMIT 1", ConsistencyLevel.ONE); + assertEquals(1, result.size()); + } + @Test public void canOnlyExecuteWithCorrectConsistencyLevel() { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org