This is an automated email from the ASF dual-hosted git repository.
maedhroz pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push:
new cf806cac1a Support LIKE expressions in filtering queries
cf806cac1a is described below
commit cf806cac1aca7b56fe715bbd2641cda65beef91e
Author: Pranav Shenoy <[email protected]>
AuthorDate: Mon Sep 8 20:43:07 2025 -0700
Support LIKE expressions in filtering queries
patch by Pranav Shenoy; reviewed by Caleb Rackliffe and David Capwell for
CASSANDRA-17198
---
CHANGES.txt | 1 +
src/java/org/apache/cassandra/cql3/Operator.java | 8 +--
src/java/org/apache/cassandra/cql3/Ordering.java | 3 +-
src/java/org/apache/cassandra/cql3/Relation.java | 6 +-
.../cql3/restrictions/SimpleRestriction.java | 17 +++--
.../cql3/restrictions/StatementRestrictions.java | 6 +-
.../cassandra/index/sai/plan/Expression.java | 43 +++++++++++-
.../ClusteringColumnRestrictionsTest.java | 10 +--
.../validation/entities/SecondaryIndexTest.java | 8 +--
.../index/sai/cql/AllowFilteringTest.java | 81 +++++++++++++++++++++-
.../index/sai/cql/UnindexedExpressionsTest.java | 2 +-
.../apache/cassandra/index/sasi/SASIIndexTest.java | 12 +---
12 files changed, 156 insertions(+), 41 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index e3fd000fbd..a4b6a99c21 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
5.1
+ * Support LIKE expressions in filtering queries (CASSANDRA-17198)
* Make legacy index rebuilds safe on Gossip -> TCM upgrades (CASSANDRA-20887)
* Minor improvements and hardening for IndexHints (CASSANDRA-20888)
* Stop repair scheduler if two major versions are detected (CASSANDRA-20048)
diff --git a/src/java/org/apache/cassandra/cql3/Operator.java
b/src/java/org/apache/cassandra/cql3/Operator.java
index 93a81fa2f4..428ab0f533 100644
--- a/src/java/org/apache/cassandra/cql3/Operator.java
+++ b/src/java/org/apache/cassandra/cql3/Operator.java
@@ -611,7 +611,7 @@ public enum Operator
public boolean isSatisfiedBy(AbstractType<?> type, ByteBuffer
leftOperand, ByteBuffer rightOperand)
{
- return leftOperand != null && ByteBufferUtil.contains(leftOperand,
rightOperand);
+ return leftOperand != null && leftOperand.equals(rightOperand);
}
},
LIKE(14)
@@ -621,12 +621,6 @@ public enum Operator
{
throw new UnsupportedOperationException();
}
-
- @Override
- public boolean requiresIndexing()
- {
- return true;
- }
},
ANN(15)
{
diff --git a/src/java/org/apache/cassandra/cql3/Ordering.java
b/src/java/org/apache/cassandra/cql3/Ordering.java
index f8485edfbd..a0d95a8903 100644
--- a/src/java/org/apache/cassandra/cql3/Ordering.java
+++ b/src/java/org/apache/cassandra/cql3/Ordering.java
@@ -105,7 +105,8 @@ public class Ordering
{
return new
SimpleRestriction(ColumnsExpression.singleColumn(columnMetadata, tableMetadata),
Operator.ANN,
- Terms.of(vectorValue));
+ Terms.of(vectorValue),
+ false);
}
}
diff --git a/src/java/org/apache/cassandra/cql3/Relation.java
b/src/java/org/apache/cassandra/cql3/Relation.java
index 31d6f771ce..9447f04552 100644
--- a/src/java/org/apache/cassandra/cql3/Relation.java
+++ b/src/java/org/apache/cassandra/cql3/Relation.java
@@ -187,7 +187,7 @@ public final class Relation
* @return the <code>Restriction</code> corresponding to this
<code>Relation</code>
* @throws InvalidRequestException if this <code>Relation</code> is not
valid
*/
- public SingleRestriction toRestriction(TableMetadata table,
VariableSpecifications boundNames)
+ public SingleRestriction toRestriction(TableMetadata table,
VariableSpecifications boundNames, boolean allowFiltering)
{
ColumnsExpression columnsExpression = rawExpressions.prepare(table);
@@ -217,9 +217,9 @@ public final class Relation
// An IN restriction with only one element is the same as an EQ
restriction
if (operator.isIN() && terms.containsSingleTerm())
- return new SimpleRestriction(columnsExpression, Operator.EQ,
terms);
+ return new SimpleRestriction(columnsExpression, Operator.EQ,
terms, allowFiltering);
- return new SimpleRestriction(columnsExpression, operator, terms);
+ return new SimpleRestriction(columnsExpression, operator, terms,
allowFiltering);
}
public ColumnIdentifier column()
diff --git
a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java
b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java
index 5c41ef36cc..f3bb3bf4ee 100644
--- a/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java
+++ b/src/java/org/apache/cassandra/cql3/restrictions/SimpleRestriction.java
@@ -43,6 +43,7 @@ import org.apache.cassandra.utils.ByteBufferUtil;
import static
org.apache.cassandra.cql3.statements.RequestValidations.checkFalse;
import static
org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest;
+import java.util.Optional;
/**
* A simple predicate on a columns expression (e.g. columnA = X).
@@ -64,11 +65,17 @@ public final class SimpleRestriction implements
SingleRestriction
*/
private final Terms values;
- public SimpleRestriction(ColumnsExpression columnsExpression, Operator
operator, Terms values)
+ /**
+ * Indicates if the query has allow filtering
+ */
+ private final boolean allowFiltering;
+
+ public SimpleRestriction(ColumnsExpression columnsExpression, Operator
operator, Terms values, boolean allowFiltering)
{
this.columnsExpression = columnsExpression;
this.operator = operator;
this.values = values;
+ this.allowFiltering = allowFiltering;
}
@Override
@@ -344,11 +351,11 @@ public final class SimpleRestriction implements
SingleRestriction
else if (operator == Operator.LIKE)
{
LikePattern pattern = LikePattern.parse(buffers.get(0));
- // there must be a suitable INDEX for LIKE_XXX expressions
+
RowFilter.SimpleExpression expression = filter.add(column,
pattern.kind().operator(), pattern.value());
- indexRegistry.getBestIndexFor(expression, indexHints)
- .orElseThrow(() -> invalidRequest("%s is only
supported on properly indexed columns",
-
expression));
+ Optional<Index> index =
indexRegistry.getBestIndexFor(expression, indexHints);
+ if(!index.isPresent() && !allowFiltering)
+ throw invalidRequest("%s is only supported on properly
indexed columns or with ALLOW FILTERING", expression);
}
else
{
diff --git
a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
index 4f96da6c96..3b40d7e412 100644
--- a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
+++ b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java
@@ -226,11 +226,11 @@ public final class StatementRestrictions
if (!forView)
throw new InvalidRequestException("Unsupported
restriction: " + relation);
- this.notNullColumns.addAll(relation.toRestriction(table,
boundNames).columns());
+ this.notNullColumns.addAll(relation.toRestriction(table,
boundNames, allowFiltering).columns());
}
else if (operator.requiresIndexing())
{
- Restriction restriction = relation.toRestriction(table,
boundNames);
+ Restriction restriction = relation.toRestriction(table,
boundNames, allowFiltering);
if (!type.allowUseOfSecondaryIndices() ||
!restriction.hasSupportingIndex(indexRegistry, indexHints))
throw invalidRequest("%s restriction is only supported on
properly " +
@@ -240,7 +240,7 @@ public final class StatementRestrictions
}
else
{
- addRestriction(relation.toRestriction(table, boundNames),
indexRegistry, indexHints);
+ addRestriction(relation.toRestriction(table, boundNames,
allowFiltering), indexRegistry, indexHints);
}
}
diff --git a/src/java/org/apache/cassandra/index/sai/plan/Expression.java
b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
index 9823c3c072..a5cf07e35d 100644
--- a/src/java/org/apache/cassandra/index/sai/plan/Expression.java
+++ b/src/java/org/apache/cassandra/index/sai/plan/Expression.java
@@ -31,6 +31,7 @@ import org.apache.cassandra.db.marshal.ListType;
import org.apache.cassandra.index.sai.StorageAttachedIndex;
import org.apache.cassandra.index.sai.analyzer.AbstractAnalyzer;
import org.apache.cassandra.index.sai.utils.IndexTermType;
+import org.apache.cassandra.utils.ByteBufferUtil;
/**
* An {@link Expression} is an internal representation of an index query
operation. They are built from
@@ -79,7 +80,7 @@ public abstract class Expression
public enum IndexOperator
{
- EQ, RANGE, CONTAINS_KEY, CONTAINS_VALUE, ANN, IN;
+ EQ, RANGE, CONTAINS_KEY, CONTAINS_VALUE, ANN, IN, LIKE_PREFIX,
LIKE_SUFFIX, LIKE_MATCHES, LIKE_CONTAINS;
public static IndexOperator valueOf(Operator operator)
{
@@ -106,6 +107,15 @@ public abstract class Expression
case IN:
return IN;
+ case LIKE_PREFIX:
+ return LIKE_PREFIX;
+ case LIKE_SUFFIX:
+ return LIKE_SUFFIX;
+ case LIKE_CONTAINS:
+ return LIKE_CONTAINS;
+ case LIKE_MATCHES:
+ return LIKE_MATCHES;
+
default:
return null;
@@ -114,13 +124,18 @@ public abstract class Expression
public boolean isEquality()
{
- return this == EQ || this == CONTAINS_KEY || this ==
CONTAINS_VALUE || this == IN;
+ return this == EQ || this == CONTAINS_KEY || this ==
CONTAINS_VALUE || this == IN || isLikeVariant();
}
public boolean isEqualityOrRange()
{
return isEquality() || this == RANGE;
}
+
+ public boolean isLikeVariant()
+ {
+ return this == LIKE_SUFFIX || this == LIKE_PREFIX || this ==
LIKE_CONTAINS || this == LIKE_MATCHES;
+ }
}
public abstract boolean isNotIndexed();
@@ -172,6 +187,10 @@ public abstract class Expression
case EQ:
case CONTAINS:
case CONTAINS_KEY:
+ case LIKE_PREFIX:
+ case LIKE_SUFFIX:
+ case LIKE_MATCHES:
+ case LIKE_CONTAINS:
case IN:
lower = new Bound(value, indexTermType, true);
upper = lower;
@@ -354,6 +373,26 @@ public abstract class Expression
}
}
break;
+ case LIKE_PREFIX:
+ {
+ isMatch = ByteBufferUtil.startsWith(term, requestedValue);
+ break;
+ }
+ case LIKE_SUFFIX:
+ {
+ isMatch = ByteBufferUtil.endsWith(term, requestedValue);
+ break;
+ }
+ case LIKE_CONTAINS:
+ {
+ isMatch = ByteBufferUtil.contains(term, requestedValue);
+ break;
+ }
+ case LIKE_MATCHES:
+ {
+ isMatch = term.equals(requestedValue);
+ break;
+ }
}
return isMatch;
}
diff --git
a/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java
b/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java
index e7b836ca80..d1bc875681 100644
---
a/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java
+++
b/test/unit/org/apache/cassandra/cql3/restrictions/ClusteringColumnRestrictionsTest.java
@@ -1645,7 +1645,7 @@ public class ClusteringColumnRestrictionsTest
private static Restriction newSingleRestriction(TableMetadata
tableMetadata, int index, Operator operator, ByteBuffer... values)
{
ColumnMetadata column = getClusteringColumnDefinition(tableMetadata,
index);
- return new SimpleRestriction(ColumnsExpression.singleColumn(column,
tableMetadata), operator, toTerms(values));
+ return new SimpleRestriction(ColumnsExpression.singleColumn(column,
tableMetadata), operator, toTerms(values), false);
}
/**
@@ -1669,7 +1669,8 @@ public class ClusteringColumnRestrictionsTest
TupleType tupleType = new TupleType(types);
return new SimpleRestriction(ColumnsExpression.multiColumns(columns,
tableMetadata),
Operator.EQ,
- Terms.of(new
MultiElements.Value(tupleType, asList(values))));
+ Terms.of(new
MultiElements.Value(tupleType, asList(values))),
+ false);
}
/**
@@ -1700,7 +1701,7 @@ public class ClusteringColumnRestrictionsTest
{
terms.add(new MultiElements.Value(tupleType, values[i]));
}
- return new SimpleRestriction(ColumnsExpression.multiColumns(columns,
tableMetadata), Operator.IN, Terms.of(terms));
+ return new SimpleRestriction(ColumnsExpression.multiColumns(columns,
tableMetadata), Operator.IN, Terms.of(terms), false);
}
/**
@@ -1737,7 +1738,8 @@ public class ClusteringColumnRestrictionsTest
TupleType type = new TupleType(types);
return new SimpleRestriction(ColumnsExpression.multiColumns(columns,
tableMetadata),
operator,
- Terms.of(new MultiElements.Value(type,
asList(values))));
+ Terms.of(new MultiElements.Value(type,
asList(values))),
+ false);
}
/**
diff --git
a/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java
b/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java
index 6888ff3a93..e12682a63f 100644
---
a/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java
+++
b/test/unit/org/apache/cassandra/cql3/validation/entities/SecondaryIndexTest.java
@@ -843,16 +843,16 @@ public class SecondaryIndexTest extends CQLTester
// LIKE is not supported on indexes of non-literal values
// this is rejected before binding, so the value isn't available in
the error message
- assertInvalidMessage("LIKE restriction is only supported on properly
indexed columns. v3 LIKE ? is not valid",
+
assertInvalidMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE,
"SELECT * FROM %s WHERE v3 LIKE ?",
"%abc");
- assertInvalidMessage("LIKE restriction is only supported on properly
indexed columns. v3 LIKE ? is not valid",
+
assertInvalidMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE,
"SELECT * FROM %s WHERE v3 LIKE ?",
"%abc%");
- assertInvalidMessage("LIKE restriction is only supported on properly
indexed columns. v3 LIKE ? is not valid",
+
assertInvalidMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE,
"SELECT * FROM %s WHERE v3 LIKE ?",
"%abc%");
- assertInvalidMessage("LIKE restriction is only supported on properly
indexed columns. v3 LIKE ? is not valid",
+
assertInvalidMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE,
"SELECT * FROM %s WHERE v3 LIKE ?",
"abc");
}
diff --git
a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
index 471a976fb9..21bdfe9cf9 100644
--- a/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
+++ b/test/unit/org/apache/cassandra/index/sai/cql/AllowFilteringTest.java
@@ -33,7 +33,7 @@ import org.apache.cassandra.service.StorageService;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.junit.Assert.assertNotNull;
-
+import org.apache.cassandra.exceptions.InvalidRequestException;
/**
* Tests that {@code ALLOW FILTERING} is required only if needed.
*/
@@ -433,4 +433,83 @@ public class AllowFilteringTest extends SAITester
execute("SELECT * FROM %s WHERE v=0");
execute("SELECT * FROM %s WHERE v=0 ALLOW FILTERING");
}
+
+ @Test
+ public void testAllowFilteringWithLikePrefixPostFiltering()
+ {
+ createTable("CREATE TABLE %S (k1 int, k2 text, k3 int, PRIMARY KEY
(k1))");
+ createIndex("CREATE INDEX ON %s(k3) USING 'sai'");
+
+ execute("insert into %s (k1, k2, k3) values (1, 'fo', 1)");
+ execute("insert into %s (k1, k2, k3) values (2, 'foo', 2)");
+ execute("insert into %s (k1, k2, k3) values (3, 'fo', 3)");
+ execute("insert into %s (k1, k2, k3) values (4, 'ba', 4)");
+ execute("insert into %s (k1, k2, k3) values (5, 'bar', 5)");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'f%%' ALLOW FILTERING"), 3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'ca%%' ALLOW FILTERING"), 0);
+ assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE k3 > 0 AND k2
LIKE 'f%%'"))
+
.hasMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE)
+ .isInstanceOf(InvalidRequestException.class);
+ }
+
+ @Test
+ public void testAllowFilteringWithLikeSuffixPostFiltering()
+ {
+ createTable("CREATE TABLE %S (k1 int, k2 text, k3 int, PRIMARY KEY
(k1))");
+ createIndex("CREATE INDEX ON %s(k3) USING 'sai'");
+
+ execute("insert into %s (k1, k2, k3) values (1, 'fo', 1)");
+ execute("insert into %s (k1, k2, k3) values (2, 'foo', 2)");
+ execute("insert into %s (k1, k2, k3) values (3, 'fo', 3)");
+ execute("insert into %s (k1, k2, k3) values (4, 'ba', 4)");
+ execute("insert into %s (k1, k2, k3) values (5, 'bar', 5)");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'%%o' ALLOW FILTERING"), 3);
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'%%c' ALLOW FILTERING"), 0);
+ assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE k3 > 0 AND k2
LIKE '%%c'"))
+
.hasMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE)
+ .isInstanceOf(InvalidRequestException.class);
+ }
+
+ @Test
+ public void testAllowFilteringWithLikeContainsPostFiltering()
+ {
+ createTable("CREATE TABLE %S (k1 int, k2 text, k3 int, PRIMARY KEY
(k1))");
+ createIndex("CREATE INDEX ON %s(k3) USING 'sai'");
+
+ execute("insert into %s (k1, k2, k3) values (1, 'fo', 1)");
+ execute("insert into %s (k1, k2, k3) values (2, 'foo', 2)");
+ execute("insert into %s (k1, k2, k3) values (3, 'fo', 3)");
+ execute("insert into %s (k1, k2, k3) values (4, 'ba', 4)");
+ execute("insert into %s (k1, k2, k3) values (5, 'bar', 5)");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'%%ar%%' ALLOW FILTERING"), 1);
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'%%ca%%' ALLOW FILTERING"), 0);
+ assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE k3 > 0 AND k2
LIKE '%%ca%%'"))
+
.hasMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE)
+ .isInstanceOf(InvalidRequestException.class);
+ }
+
+ @Test
+ public void testAllowFilteringWithLikeMatchesPostFiltering()
+ {
+ createTable("CREATE TABLE %S (k1 int, k2 text, k3 int, PRIMARY KEY
(k1))");
+ createIndex("CREATE INDEX ON %s(k3) USING 'sai'");
+
+ execute("insert into %s (k1, k2, k3) values (1, 'fo', 1)");
+ execute("insert into %s (k1, k2, k3) values (2, 'foo', 2)");
+ execute("insert into %s (k1, k2, k3) values (3, 'fo', 3)");
+ execute("insert into %s (k1, k2, k3) values (4, 'ba', 4)");
+ execute("insert into %s (k1, k2, k3) values (5, 'bar', 5)");
+
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'foo' ALLOW FILTERING"), 1);
+ assertRowCount(execute("SELECT * FROM %s WHERE k3 > 0 AND k2 LIKE
'baar' ALLOW FILTERING"), 0);
+ assertThatThrownBy(() -> execute("SELECT * FROM %s WHERE k3 > 0 AND k2
LIKE 'baar'"))
+
.hasMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE)
+ .isInstanceOf(InvalidRequestException.class);
+ }
+
+
+
}
diff --git
a/test/unit/org/apache/cassandra/index/sai/cql/UnindexedExpressionsTest.java
b/test/unit/org/apache/cassandra/index/sai/cql/UnindexedExpressionsTest.java
index 6818f0edc2..6b409497b9 100644
--- a/test/unit/org/apache/cassandra/index/sai/cql/UnindexedExpressionsTest.java
+++ b/test/unit/org/apache/cassandra/index/sai/cql/UnindexedExpressionsTest.java
@@ -63,7 +63,7 @@ public class UnindexedExpressionsTest extends SAITester
execute("INSERT INTO %s (pk, val1, val2) VALUES (4, 4, '44')");
// The LIKE operator is rejected because it needs to be handled by an
index
- assertInvalidMessage("LIKE restriction is only supported on properly
indexed columns",
+
assertInvalidMessage(StatementRestrictions.REQUIRES_ALLOW_FILTERING_MESSAGE,
"SELECT pk FROM %s WHERE val1 = 1 AND val2 like
'1%%'");
// The IS NOT operator is only valid on materialized views
diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
index 464775c266..fd7757f845 100644
--- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
@@ -1927,16 +1927,8 @@ public class SASIIndexTest
Assert.assertNotNull(results);
Assert.assertEquals(2, results.size());
- try
- {
- executeCQL(CLUSTERING_CF_NAME_1 ,"SELECT * FROM %s.%s WHERE
location LIKE '%%U' ALLOW FILTERING");
- Assert.fail();
- }
- catch (InvalidRequestException e)
- {
- Assert.assertTrue(e.getMessage().contains("only supported"));
- // expected
- }
+ results = executeCQL(CLUSTERING_CF_NAME_1 ,"SELECT * FROM %s.%s WHERE
location LIKE '%%U' ALLOW FILTERING");
+ Assert.assertNotNull(results);
try
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]