add LIKE support to CQL3 patch by xedin; reviewed by beobal for CASSANDRA-11067
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/39286654 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/39286654 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/39286654 Branch: refs/heads/trunk Commit: 392866546e6377aafd018c3fcb2e124674a318dc Parents: 64cfcf0 Author: Pavel Yaskevich <xe...@apache.org> Authored: Mon Jan 25 19:13:36 2016 -0800 Committer: Pavel Yaskevich <xe...@apache.org> Committed: Fri Feb 5 11:56:23 2016 -0800 ---------------------------------------------------------------------- CHANGES.txt | 1 + doc/SASI.md | 64 +-- src/java/org/apache/cassandra/cql3/Cql.g | 43 ++ .../cassandra/cql3/MultiColumnRelation.java | 6 + .../org/apache/cassandra/cql3/Operator.java | 31 ++ .../org/apache/cassandra/cql3/Relation.java | 15 + .../cassandra/cql3/SingleColumnRelation.java | 18 +- .../apache/cassandra/cql3/TokenRelation.java | 6 + .../cql3/restrictions/AbstractRestriction.java | 6 + .../ForwardingPrimaryKeyRestrictions.java | 6 + .../cql3/restrictions/Restriction.java | 1 + .../restrictions/SingleColumnRestriction.java | 78 ++++ .../restrictions/StatementRestrictions.java | 22 +- .../apache/cassandra/db/filter/RowFilter.java | 3 + .../apache/cassandra/index/sasi/SASIIndex.java | 21 +- .../cassandra/index/sasi/conf/ColumnIndex.java | 6 + .../cassandra/index/sasi/conf/IndexMode.java | 6 + .../index/sasi/conf/view/PrefixTermTree.java | 11 +- .../index/sasi/conf/view/RangeTermTree.java | 40 +- .../cassandra/index/sasi/conf/view/View.java | 34 +- .../cassandra/index/sasi/disk/OnDiskIndex.java | 16 + .../index/sasi/disk/OnDiskIndexBuilder.java | 17 +- .../index/sasi/memory/TrieMemIndex.java | 41 +- .../cassandra/index/sasi/plan/Expression.java | 65 ++- .../cassandra/index/sasi/plan/Operation.java | 32 +- .../apache/cassandra/utils/ByteBufferUtil.java | 32 ++ .../unit/org/apache/cassandra/SchemaLoader.java | 1 + .../cassandra/index/sasi/SASIIndexTest.java | 409 +++++++++++-------- .../index/sasi/disk/OnDiskIndexTest.java | 13 +- .../index/sasi/plan/OperationTest.java | 4 +- .../cassandra/utils/ByteBufferUtilTest.java | 53 +++ 31 files changed, 855 insertions(+), 246 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 3fcca42..e7cde67 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.4 + * Add LIKE support to CQL3 (CASSANDRA-11067) * Generic Java UDF types (CASSANDRA-10819) * cqlsh: Include sub-second precision in timestamps by default (CASSANDRA-10428) * Set javac encoding to utf-8 (CASSANDRA-11077) http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/doc/SASI.md ---------------------------------------------------------------------- diff --git a/doc/SASI.md b/doc/SASI.md index 64573b8..90fcf65 100644 --- a/doc/SASI.md +++ b/doc/SASI.md @@ -124,18 +124,34 @@ cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi; #### Equality & Prefix Queries -SASI supports simple queries already supported by CQL, however, for -text fields like `first_name` equals queries perform prefix searches --- this is similar to `first_name LIKE 'M*'` in SQL (excluding case -sensitivity, which is dependent on the index configuration). The -semantics of CQL's `=` were modified instead of making further -modifications of the grammar with the introduction of a `LIKE` -operator. Ideally, CQL would be modified to include such an operator, -supporting both prefix and suffix searches. +SASI supports all queries already supported by CQL, including LIKE statement +for PREFIX, CONTAINS and SUFFIX searches. ``` cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name = 'M'; + ... WHERE first_name = 'Pavel'; + + first_name | last_name | age | height | created_at +-------------+-----------+-----+--------+--------------- + Pavel | Yaskevich | 27 | 181 | 1442959315018 + +(1 rows) +``` + +``` +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name = 'pavel'; + + first_name | last_name | age | height | created_at +-------------+-----------+-----+--------+--------------- + Pavel | Yaskevich | 27 | 181 | 1442959315018 + +(1 rows) +``` + +``` +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name LIKE 'M%'; first_name | last_name | age | height | created_at ------------+-----------+-----+--------+--------------- @@ -150,7 +166,7 @@ column because of the options provided at index creation time. ``` cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name = 'm'; + ... WHERE first_name LIKE 'm%'; first_name | last_name | age | height | created_at ------------+-----------+-----+--------+--------------- @@ -175,7 +191,7 @@ section. ``` cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi - ... WHERE first_name = 'M' and age < 30 ALLOW FILTERING; + ... WHERE first_name LIKE 'M%' and age < 30 ALLOW FILTERING; first_name | last_name | age | height | created_at ------------+-----------+-----+--------+--------------- @@ -192,7 +208,7 @@ containing the search string as a sub-string. In this case the strings containing "a" or "an". ``` -cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'a'; +cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%'; id | age | created_at | first_name | height | last_name --------------------------------------+-----+---------------+------------+--------+--------------- @@ -204,7 +220,7 @@ cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'a'; (5 rows) -cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'an'; +cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%an%'; id | age | created_at | first_name | height | last_name --------------------------------------+-----+---------------+------------+--------+----------- @@ -220,7 +236,7 @@ SASI also supports filtering on non-indexed columns like `height`. The expression can only narrow down an existing query using `AND`. ``` -cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'a' AND height >= 175 ALLOW FILTERING; +cqlsh:demo> SELECT * FROM sasi WHERE last_name LIKE '%a%' AND height >= 175 ALLOW FILTERING; id | age | created_at | first_name | height | last_name --------------------------------------+-----+---------------+------------+--------+--------------- @@ -274,7 +290,7 @@ property but for the query demonstrates the stemming applied by [`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java). ``` -cqlsh:demo> SELECT * FROM sasi WHERE bio = 'distributing'; +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'distributing'; id | age | bio | created_at | first_name | height | last_name --------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- @@ -283,7 +299,7 @@ cqlsh:demo> SELECT * FROM sasi WHERE bio = 'distributing'; (2 rows) -cqlsh:demo> SELECT * FROM sasi WHERE bio = 'they argued'; +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'they argued'; id | age | bio | created_at | first_name | height | last_name --------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- @@ -292,7 +308,7 @@ cqlsh:demo> SELECT * FROM sasi WHERE bio = 'they argued'; (2 rows) -cqlsh:demo> SELECT * FROM sasi WHERE bio = 'working at the company'; +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'working at the company'; id | age | bio | created_at | first_name | height | last_name --------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- @@ -300,7 +316,7 @@ cqlsh:demo> SELECT * FROM sasi WHERE bio = 'working at the company'; (1 rows) -cqlsh:demo> SELECT * FROM sasi WHERE bio = 'soft eng'; +cqlsh:demo> SELECT * FROM sasi WHERE bio LIKE 'soft eng'; id | age | bio | created_at | first_name | height | last_name --------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- @@ -709,7 +725,7 @@ the `LookupIntersectionIterator`, otherwise the ### The SASIIndex Class The above components are glued together by the -[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasiIndex.java) +[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java) class which implements `Index`, and is instantiated per-table containing SASI indexes. It manages all indexes for a table via the @@ -718,11 +734,11 @@ and [`sasi.conf.view.View`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/view/View.java) components, controls writing of all indexes for an SSTable via its [`PerSSTableIndexWriter`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java), and initiates searches with -`Indexer`. These classes glue the previously +`Searcher`. These classes glue the previously mentioned indexing components together with Cassandra's SSTable life-cycle ensuring indexes are not only written when Memtable's flush but also as SSTable's are compacted. For querying, the -`Indexer` does little but defer to +`Searcher` does little but defer to [`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) and update e.g. latency metrics exposed by SASI. @@ -750,12 +766,6 @@ available in this repository or are not currently implemented. `LongToken`s, e.g. `Murmur3Partitioner`. Other existing partitioners which don't produce LongToken e.g. `ByteOrderedPartitioner` and `RandomPartitioner` will not work with SASI. -* `ALLOW FILTERING`, the requirement of at least one indexes `=` - expression, and lack of `LIKE` limit SASIs - feature-set. Modifications to the grammar to allow `Index` - implementations to enumerate its supported features would allow SASI - to expose more features without need to support them in other - implementations. * Not Equals and OR support have been removed in this release while changes are made to Cassandra itself to support them. http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/Cql.g ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/Cql.g b/src/java/org/apache/cassandra/cql3/Cql.g index 6b4eff6..d560119 100644 --- a/src/java/org/apache/cassandra/cql3/Cql.g +++ b/src/java/org/apache/cassandra/cql3/Cql.g @@ -182,6 +182,46 @@ options { return filtered; } + + public void buildLIKERelation(WhereClause.Builder whereClause, ColumnIdentifier.Raw name, String likeValue) + { + Operator operator; + int beginIndex = 0; + int endIndex = likeValue.length() - 1; + + if (likeValue.charAt(endIndex) == '\%') + { + if (likeValue.charAt(beginIndex) == '\%') + { + operator = Operator.LIKE_CONTAINS; + beginIndex =+ 1; + } + else + { + operator = Operator.LIKE_PREFIX; + } + } + else if (likeValue.charAt(beginIndex) == '\%') + { + operator = Operator.LIKE_SUFFIX; + beginIndex += 1; + endIndex += 1; + } + else + { + operator = Operator.EQ; + endIndex += 1; + } + + if (endIndex == 0 || beginIndex == endIndex) + { + addRecognitionError("LIKE value can't be empty."); + return; + } + + String value = likeValue.substring(beginIndex, endIndex); + whereClause.add(new SingleColumnRelation(name, operator, Constants.Literal.string(value))); + } } @lexer::header { @@ -1423,6 +1463,7 @@ relationType returns [Operator op] relation[WhereClause.Builder clauses] : name=cident type=relationType t=term { $clauses.add(new SingleColumnRelation(name, type, t)); } + | name=cident K_LIKE v=STRING_LITERAL { buildLIKERelation($clauses, name, $v.text); } | name=cident K_IS K_NOT K_NULL { $clauses.add(new SingleColumnRelation(name, Operator.IS_NOT, Constants.NULL_LITERAL)); } | K_TOKEN l=tupleOfIdentifiers type=relationType t=term { $clauses.add(new TokenRelation(l, type, t)); } @@ -1628,6 +1669,7 @@ basic_unreserved_keyword returns [String str] | K_JSON | K_CALLED | K_INPUT + | K_LIKE ) { $str = $k.text; } ; @@ -1767,6 +1809,7 @@ K_OR: O R; K_REPLACE: R E P L A C E; K_JSON: J S O N; +K_LIKE: L I K E; // Case-insensitive alpha characters fragment A: ('a'|'A'); http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/MultiColumnRelation.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/MultiColumnRelation.java b/src/java/org/apache/cassandra/cql3/MultiColumnRelation.java index 143106d..2c1f97b 100644 --- a/src/java/org/apache/cassandra/cql3/MultiColumnRelation.java +++ b/src/java/org/apache/cassandra/cql3/MultiColumnRelation.java @@ -183,6 +183,12 @@ public class MultiColumnRelation extends Relation } @Override + protected Restriction newLikeRestriction(CFMetaData cfm, VariableSpecifications boundNames, Operator operator) throws InvalidRequestException + { + throw invalidRequest("%s cannot be used for multi-column relations", operator()); + } + + @Override protected Term toTerm(List<? extends ColumnSpecification> receivers, Raw raw, String keyspace, http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/Operator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/Operator.java b/src/java/org/apache/cassandra/cql3/Operator.java index 7b28a30..d518961 100644 --- a/src/java/org/apache/cassandra/cql3/Operator.java +++ b/src/java/org/apache/cassandra/cql3/Operator.java @@ -29,6 +29,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.ListType; import org.apache.cassandra.db.marshal.MapType; import org.apache.cassandra.db.marshal.SetType; +import org.apache.cassandra.utils.ByteBufferUtil; public enum Operator { @@ -101,6 +102,30 @@ public enum Operator { return "IS NOT"; } + }, + LIKE_PREFIX(10) + { + @Override + public String toString() + { + return "LIKE '<term>%'"; + } + }, + LIKE_SUFFIX(11) + { + @Override + public String toString() + { + return "LIKE '%<term>'"; + } + }, + LIKE_CONTAINS(12) + { + @Override + public String toString() + { + return "LIKE '%<term>%'"; + } }; /** @@ -193,6 +218,12 @@ public enum Operator case CONTAINS_KEY: Map map = (Map) type.getSerializer().deserialize(leftOperand); return map.containsKey(((MapType) type).getKeysType().getSerializer().deserialize(rightOperand)); + case LIKE_PREFIX: + return ByteBufferUtil.startsWith(leftOperand, rightOperand); + case LIKE_SUFFIX: + return ByteBufferUtil.endsWith(leftOperand, rightOperand); + case LIKE_CONTAINS: + return ByteBufferUtil.contains(leftOperand, rightOperand); default: // we shouldn't get CONTAINS, CONTAINS KEY, or IS NOT here throw new AssertionError(); http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/Relation.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/Relation.java b/src/java/org/apache/cassandra/cql3/Relation.java index 334464f..81f46a6 100644 --- a/src/java/org/apache/cassandra/cql3/Relation.java +++ b/src/java/org/apache/cassandra/cql3/Relation.java @@ -108,6 +108,13 @@ public abstract class Relation { return relationType == Operator.EQ; } + public final boolean isLIKE() + { + return relationType == Operator.LIKE_PREFIX + || relationType == Operator.LIKE_SUFFIX + || relationType == Operator.LIKE_CONTAINS; + } + /** * Checks if the operator of this relation is a <code>Slice</code> (GT, GTE, LTE, LT). * @@ -143,6 +150,10 @@ public abstract class Relation { case CONTAINS: return newContainsRestriction(cfm, boundNames, false); case CONTAINS_KEY: return newContainsRestriction(cfm, boundNames, true); case IS_NOT: return newIsNotRestriction(cfm, boundNames); + case LIKE_PREFIX: + case LIKE_SUFFIX: + case LIKE_CONTAINS: + return newLikeRestriction(cfm, boundNames, relationType); default: throw invalidRequest("Unsupported \"!=\" relation: %s", this); } } @@ -200,6 +211,10 @@ public abstract class Relation { protected abstract Restriction newIsNotRestriction(CFMetaData cfm, VariableSpecifications boundNames) throws InvalidRequestException; + protected abstract Restriction newLikeRestriction(CFMetaData cfm, + VariableSpecifications boundNames, + Operator operator) throws InvalidRequestException; + /** * Converts the specified <code>Raw</code> into a <code>Term</code>. * @param receivers the columns to which the values must be associated at http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/SingleColumnRelation.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/SingleColumnRelation.java b/src/java/org/apache/cassandra/cql3/SingleColumnRelation.java index 867a919..9053514 100644 --- a/src/java/org/apache/cassandra/cql3/SingleColumnRelation.java +++ b/src/java/org/apache/cassandra/cql3/SingleColumnRelation.java @@ -34,6 +34,7 @@ import org.apache.cassandra.exceptions.InvalidRequestException; import static org.apache.cassandra.cql3.statements.RequestValidations.checkFalse; import static org.apache.cassandra.cql3.statements.RequestValidations.checkTrue; +import static org.apache.cassandra.cql3.statements.RequestValidations.invalidRequest; /** * Relations encapsulate the relationship between an entity of some kind, and @@ -150,6 +151,9 @@ public final class SingleColumnRelation extends Relation if (isIN()) return String.format("%s IN %s", entityAsString, inValues); + if (isLIKE()) + return String.format("%s %s", entityAsString, relationType); + return String.format("%s %s %s", entityAsString, relationType, value); } @@ -215,6 +219,18 @@ public final class SingleColumnRelation extends Relation return new SingleColumnRestriction.IsNotNullRestriction(columnDef); } + @Override + protected Restriction newLikeRestriction(CFMetaData cfm, VariableSpecifications boundNames, Operator operator) throws InvalidRequestException + { + if (mapKey != null) + throw invalidRequest("%s can't be used with collections.", operator()); + + ColumnDefinition columnDef = toColumnDefinition(cfm, entity); + Term term = toTerm(toReceivers(columnDef, cfm.isDense()), value, cfm.ksName, boundNames); + + return new SingleColumnRestriction.LikeRestriction(columnDef, operator, term); + } + /** * Returns the receivers for this relation. * @param columnDef the column definition @@ -305,6 +321,6 @@ public final class SingleColumnRelation extends Relation private boolean canHaveOnlyOneValue() { - return isEQ() || (isIN() && inValues != null && inValues.size() == 1); + return isEQ() || isLIKE() || (isIN() && inValues != null && inValues.size() == 1); } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/TokenRelation.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/TokenRelation.java b/src/java/org/apache/cassandra/cql3/TokenRelation.java index 2c13b19..8b8b9ce 100644 --- a/src/java/org/apache/cassandra/cql3/TokenRelation.java +++ b/src/java/org/apache/cassandra/cql3/TokenRelation.java @@ -112,6 +112,12 @@ public final class TokenRelation extends Relation } @Override + protected Restriction newLikeRestriction(CFMetaData cfm, VariableSpecifications boundNames, Operator operator) throws InvalidRequestException + { + throw invalidRequest("%s cannot be used with the token function", operator); + } + + @Override protected Term toTerm(List<? extends ColumnSpecification> receivers, Raw raw, String keyspace, http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/restrictions/AbstractRestriction.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/restrictions/AbstractRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/AbstractRestriction.java index 023c2ac..932d3ae 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/AbstractRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/AbstractRestriction.java @@ -51,6 +51,12 @@ abstract class AbstractRestriction implements Restriction } @Override + public boolean isLIKE() + { + return false; + } + + @Override public boolean isIN() { return false; http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/restrictions/ForwardingPrimaryKeyRestrictions.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/restrictions/ForwardingPrimaryKeyRestrictions.java b/src/java/org/apache/cassandra/cql3/restrictions/ForwardingPrimaryKeyRestrictions.java index 18e7105..4afac42 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/ForwardingPrimaryKeyRestrictions.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/ForwardingPrimaryKeyRestrictions.java @@ -155,6 +155,12 @@ abstract class ForwardingPrimaryKeyRestrictions implements PrimaryKeyRestriction } @Override + public boolean isLIKE() + { + return getDelegate().isLIKE(); + } + + @Override public boolean isIN() { return getDelegate().isIN(); http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/restrictions/Restriction.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/restrictions/Restriction.java b/src/java/org/apache/cassandra/cql3/restrictions/Restriction.java index a84ebc4..2f066e0 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/Restriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/Restriction.java @@ -39,6 +39,7 @@ public interface Restriction public boolean isOnToken(); public boolean isSlice(); public boolean isEQ(); + public boolean isLIKE(); public boolean isIN(); public boolean isContains(); public boolean isNotNull(); http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java b/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java index d851253..318c49b 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/SingleColumnRestriction.java @@ -645,4 +645,82 @@ public abstract class SingleColumnRestriction extends AbstractRestriction return index.supportsExpression(columnDef, Operator.IS_NOT); } } + + public static final class LikeRestriction extends SingleColumnRestriction + { + private final Operator operator; + private final Term value; + + public LikeRestriction(ColumnDefinition columnDef, Operator operator, Term value) + { + super(columnDef); + this.operator = operator; + this.value = value; + } + + @Override + public Iterable<Function> getFunctions() + { + return value.getFunctions(); + } + + @Override + public boolean isEQ() + { + return false; + } + + @Override + public boolean isLIKE() + { + return true; + } + + @Override + public boolean canBeConvertedToMultiColumnRestriction() + { + return false; + } + + @Override + MultiColumnRestriction toMultiColumnRestriction() + { + throw new UnsupportedOperationException(); + } + + @Override + public void addRowFilterTo(RowFilter filter, + SecondaryIndexManager indexManager, + QueryOptions options) + { + filter.add(columnDef, operator, value.bindAndGet(options)); + } + + @Override + public MultiCBuilder appendTo(MultiCBuilder builder, QueryOptions options) + { + // LIKE could be used with clustering columns as soon as they are indexed, + // but we have to hide such expression from clustering filter since it + // can only filter based on the complete values. + return builder; + } + + @Override + public String toString() + { + return operator.toString(); + } + + @Override + public Restriction doMergeWith(Restriction otherRestriction) throws InvalidRequestException + { + throw invalidRequest("%s cannot be restricted by more than one relation if it includes a %s", columnDef.name, operator); + } + + @Override + protected boolean isSupportedBy(Index index) + { + return index.supportsExpression(columnDef, operator); + } + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java index fee17c2..ccd4941 100644 --- a/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java +++ b/src/java/org/apache/cassandra/cql3/restrictions/StatementRestrictions.java @@ -134,6 +134,16 @@ public final class StatementRestrictions { this(type, cfm); + + ColumnFamilyStore cfs; + SecondaryIndexManager secondaryIndexManager = null; + + if (type.allowUseOfSecondaryIndices()) + { + cfs = Keyspace.open(cfm.ksName).getColumnFamilyStore(cfm.cfName); + secondaryIndexManager = cfs.indexManager; + } + /* * WHERE clause. For a given entity, rules are: * - EQ relation conflicts with anything else (including a 2nd EQ) @@ -153,6 +163,15 @@ public final class StatementRestrictions for (ColumnDefinition def : relation.toRestriction(cfm, boundNames).getColumnDefs()) this.notNullColumns.add(def); } + else if (relation.isLIKE()) + { + Restriction restriction = relation.toRestriction(cfm, boundNames); + + if (!type.allowUseOfSecondaryIndices() || !restriction.hasSupportingIndex(secondaryIndexManager)) + throw new InvalidRequestException(relation + " restriction is only supported on properly indexed columns"); + + addRestriction(restriction); + } else { addRestriction(relation.toRestriction(cfm, boundNames)); @@ -164,9 +183,6 @@ public final class StatementRestrictions if (type.allowUseOfSecondaryIndices()) { - ColumnFamilyStore cfs = Keyspace.open(cfm.ksName).getColumnFamilyStore(cfm.cfName); - SecondaryIndexManager secondaryIndexManager = cfs.indexManager; - if (whereClause.containsCustomExpressions()) processCustomIndexExpressions(whereClause.expressions, boundNames, secondaryIndexManager); http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/db/filter/RowFilter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/filter/RowFilter.java b/src/java/org/apache/cassandra/db/filter/RowFilter.java index 79bdbd7..1141fd9 100644 --- a/src/java/org/apache/cassandra/db/filter/RowFilter.java +++ b/src/java/org/apache/cassandra/db/filter/RowFilter.java @@ -601,6 +601,9 @@ public abstract class RowFilter implements Iterable<RowFilter.Expression> case GTE: case GT: case NEQ: + case LIKE_PREFIX: + case LIKE_SUFFIX: + case LIKE_CONTAINS: { assert !column.isComplex() : "Only CONTAINS and CONTAINS_KEY are supported for 'complex' types"; ByteBuffer foundValue = getValue(metadata, partitionKey, row); http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/SASIIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java index d69b440..012d923 100644 --- a/src/java/org/apache/cassandra/index/sasi/SASIIndex.java +++ b/src/java/org/apache/cassandra/index/sasi/SASIIndex.java @@ -28,8 +28,10 @@ import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.Schema; import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.*; +import org.apache.cassandra.db.compaction.CompactionManager; import org.apache.cassandra.db.compaction.OperationType; import org.apache.cassandra.db.filter.RowFilter; +import org.apache.cassandra.db.lifecycle.Tracker; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.partitions.PartitionIterator; import org.apache.cassandra.db.partitions.PartitionUpdate; @@ -96,7 +98,22 @@ public class SASIIndex implements Index, INotificationConsumer ColumnDefinition column = CassandraIndex.parseTarget(baseCfs.metadata, config).left; this.index = new ColumnIndex(baseCfs.metadata.getKeyValidator(), column, config); - baseCfs.getTracker().subscribe(this); + Tracker tracker = baseCfs.getTracker(); + tracker.subscribe(this); + + SortedMap<SSTableReader, Map<ColumnDefinition, ColumnIndex>> toRebuild = new TreeMap<>((a, b) + -> Integer.compare(a.descriptor.generation, b.descriptor.generation)); + + for (SSTableReader sstable : index.init(tracker.getView().liveSSTables())) + { + Map<ColumnDefinition, ColumnIndex> perSSTable = toRebuild.get(sstable); + if (perSSTable == null) + toRebuild.put(sstable, (perSSTable = new HashMap<>())); + + perSSTable.put(index.getDefinition(), index); + } + + CompactionManager.instance.submitIndexBuild(new SASIIndexBuilder(baseCfs, toRebuild)); } public static Map<String, String> validateOptions(Map<String, String> options) @@ -164,7 +181,7 @@ public class SASIIndex implements Index, INotificationConsumer public boolean supportsExpression(ColumnDefinition column, Operator operator) { - return dependsOn(column); + return dependsOn(column) && index.supports(operator); } public AbstractType<?> customExpressionValueType() http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java b/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java index 0bc9a96..3363d21 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java @@ -25,6 +25,7 @@ import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import org.apache.cassandra.config.ColumnDefinition; +import org.apache.cassandra.cql3.Operator; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; @@ -175,6 +176,11 @@ public class ColumnIndex return isIndexed() ? mode.isLiteral : (validator instanceof UTF8Type || validator instanceof AsciiType); } + public boolean supports(Operator operator) + { + return mode.supports(Expression.Op.valueOf(operator)); + } + public static ByteBuffer getValueOf(ColumnDefinition column, Row row, int nowInSecs) { switch (column.kind) http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java index 628a6ce..41ed718 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/IndexMode.java @@ -32,6 +32,7 @@ import org.apache.cassandra.db.marshal.AbstractType; import org.apache.cassandra.db.marshal.AsciiType; import org.apache.cassandra.db.marshal.UTF8Type; import org.apache.cassandra.exceptions.ConfigurationException; +import org.apache.cassandra.index.sasi.plan.Expression.Op; import org.apache.cassandra.schema.IndexMetadata; import org.slf4j.Logger; @@ -166,4 +167,9 @@ public class IndexMode return new IndexMode(mode, isLiteral, isAnalyzed, analyzerClass, maxMemMb); } + + public boolean supports(Op operator) + { + return !(isLiteral && operator == Op.RANGE) && mode.supports(operator); + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/conf/view/PrefixTermTree.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/view/PrefixTermTree.java b/src/java/org/apache/cassandra/index/sasi/conf/view/PrefixTermTree.java index 72b6daf..f7cd942 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/view/PrefixTermTree.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/view/PrefixTermTree.java @@ -48,10 +48,11 @@ public class PrefixTermTree extends RangeTermTree public PrefixTermTree(ByteBuffer min, ByteBuffer max, Trie<ByteBuffer, Set<SSTableIndex>> trie, - IntervalTree<ByteBuffer, SSTableIndex, Interval<ByteBuffer, SSTableIndex>> ranges, - OnDiskIndexBuilder.Mode mode) + IntervalTree<Term, SSTableIndex, Interval<Term, SSTableIndex>> ranges, + OnDiskIndexBuilder.Mode mode, + AbstractType<?> comparator) { - super(min, max, ranges); + super(min, max, ranges, comparator); this.mode = mode; this.trie = trie; @@ -64,7 +65,6 @@ public class PrefixTermTree extends RangeTermTree Set<SSTableIndex> view = new HashSet<>(indexes.size()); indexes.values().forEach(view::addAll); - return Sets.union(view, super.search(e)); } @@ -76,6 +76,7 @@ public class PrefixTermTree extends RangeTermTree { super(mode, comparator); trie = new PatriciaTrie<>(new ByteBufferKeyAnalyzer(comparator)); + } public void addIndex(SSTableIndex index) @@ -87,7 +88,7 @@ public class PrefixTermTree extends RangeTermTree public TermTree build() { - return new PrefixTermTree(min, max, trie, IntervalTree.build(intervals), mode); + return new PrefixTermTree(min, max, trie, IntervalTree.build(intervals), mode, comparator); } private void addTerm(ByteBuffer term, SSTableIndex index) http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/conf/view/RangeTermTree.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/view/RangeTermTree.java b/src/java/org/apache/cassandra/index/sasi/conf/view/RangeTermTree.java index 62e5636..d6b4551 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/view/RangeTermTree.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/view/RangeTermTree.java @@ -33,13 +33,15 @@ import org.apache.cassandra.utils.IntervalTree; public class RangeTermTree implements TermTree { protected final ByteBuffer min, max; - protected final IntervalTree<ByteBuffer, SSTableIndex, Interval<ByteBuffer, SSTableIndex>> rangeTree; + protected final IntervalTree<Term, SSTableIndex, Interval<Term, SSTableIndex>> rangeTree; + protected final AbstractType<?> comparator; - public RangeTermTree(ByteBuffer min, ByteBuffer max, IntervalTree<ByteBuffer, SSTableIndex, Interval<ByteBuffer, SSTableIndex>> rangeTree) + public RangeTermTree(ByteBuffer min, ByteBuffer max, IntervalTree<Term, SSTableIndex, Interval<Term, SSTableIndex>> rangeTree, AbstractType<?> comparator) { this.min = min; this.max = max; this.rangeTree = rangeTree; + this.comparator = comparator; } public Set<SSTableIndex> search(Expression e) @@ -47,7 +49,9 @@ public class RangeTermTree implements TermTree ByteBuffer minTerm = e.lower == null ? min : e.lower.value; ByteBuffer maxTerm = e.upper == null ? max : e.upper.value; - return new HashSet<>(rangeTree.search(Interval.create(minTerm, maxTerm, (SSTableIndex) null))); + return new HashSet<>(rangeTree.search(Interval.create(new Term(minTerm, comparator), + new Term(maxTerm, comparator), + (SSTableIndex) null))); } public int intervalCount() @@ -57,7 +61,7 @@ public class RangeTermTree implements TermTree static class Builder extends TermTree.Builder { - protected final List<Interval<ByteBuffer, SSTableIndex>> intervals = new ArrayList<>(); + protected final List<Interval<Term, SSTableIndex>> intervals = new ArrayList<>(); protected Builder(OnDiskIndexBuilder.Mode mode, AbstractType<?> comparator) { @@ -66,12 +70,36 @@ public class RangeTermTree implements TermTree public void addIndex(SSTableIndex index) { - intervals.add(Interval.create(index.minTerm(), index.maxTerm(), index)); + intervals.add(Interval.create(new Term(index.minTerm(), comparator), + new Term(index.maxTerm(), comparator), index)); } + public TermTree build() { - return new RangeTermTree(min, max, IntervalTree.build(intervals)); + return new RangeTermTree(min, max, IntervalTree.build(intervals), comparator); + } + } + + + /** + * This is required since IntervalTree doesn't support custom Comparator + * implementations and relied on items to be comparable which "raw" terms are not. + */ + protected static class Term implements Comparable<Term> + { + private final ByteBuffer term; + private final AbstractType<?> comparator; + + public Term(ByteBuffer term, AbstractType<?> comparator) + { + this.term = term; + this.comparator = comparator; + } + + public int compareTo(Term o) + { + return comparator.compare(term, o.term); } } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/conf/view/View.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/conf/view/View.java b/src/java/org/apache/cassandra/index/sasi/conf/view/View.java index 378c3c6..505a4d7 100644 --- a/src/java/org/apache/cassandra/index/sasi/conf/view/View.java +++ b/src/java/org/apache/cassandra/index/sasi/conf/view/View.java @@ -20,6 +20,7 @@ package org.apache.cassandra.index.sasi.conf.view; import java.nio.ByteBuffer; import java.util.*; +import org.apache.cassandra.db.marshal.UUIDType; import org.apache.cassandra.index.sasi.SSTableIndex; import org.apache.cassandra.index.sasi.conf.ColumnIndex; import org.apache.cassandra.index.sasi.plan.Expression; @@ -39,7 +40,8 @@ public class View implements Iterable<SSTableIndex> private final Map<Descriptor, SSTableIndex> view; private final TermTree termTree; - private final IntervalTree<ByteBuffer, SSTableIndex, Interval<ByteBuffer, SSTableIndex>> keyIntervalTree; + private final AbstractType<?> keyValidator; + private final IntervalTree<Key, SSTableIndex, Interval<Key, SSTableIndex>> keyIntervalTree; public View(ColumnIndex index, Set<SSTableIndex> indexes) { @@ -58,7 +60,7 @@ public class View implements Iterable<SSTableIndex> ? new PrefixTermTree.Builder(index.getMode().mode, validator) : new RangeTermTree.Builder(index.getMode().mode, validator); - List<Interval<ByteBuffer, SSTableIndex>> keyIntervals = new ArrayList<>(); + List<Interval<Key, SSTableIndex>> keyIntervals = new ArrayList<>(); for (SSTableIndex sstableIndex : Iterables.concat(currentView, newIndexes)) { SSTableReader sstable = sstableIndex.getSSTable(); @@ -71,11 +73,14 @@ public class View implements Iterable<SSTableIndex> newView.put(sstable.descriptor, sstableIndex); termTreeBuilder.add(sstableIndex); - keyIntervals.add(Interval.create(sstableIndex.minKey(), sstableIndex.maxKey(), sstableIndex)); + keyIntervals.add(Interval.create(new Key(sstableIndex.minKey(), index.keyValidator()), + new Key(sstableIndex.maxKey(), index.keyValidator()), + sstableIndex)); } this.view = newView; this.termTree = termTreeBuilder.build(); + this.keyValidator = index.keyValidator(); this.keyIntervalTree = IntervalTree.build(keyIntervals); if (keyIntervalTree.intervalCount() != termTree.intervalCount()) @@ -89,7 +94,7 @@ public class View implements Iterable<SSTableIndex> public List<SSTableIndex> match(ByteBuffer minKey, ByteBuffer maxKey) { - return keyIntervalTree.search(Interval.create(minKey, maxKey, (SSTableIndex) null)); + return keyIntervalTree.search(Interval.create(new Key(minKey, keyValidator), new Key(maxKey, keyValidator), (SSTableIndex) null)); } public Iterator<SSTableIndex> iterator() @@ -101,4 +106,25 @@ public class View implements Iterable<SSTableIndex> { return view.values(); } + + /** + * This is required since IntervalTree doesn't support custom Comparator + * implementations and relied on items to be comparable which "raw" keys are not. + */ + private static class Key implements Comparable<Key> + { + private final ByteBuffer key; + private final AbstractType<?> comparator; + + public Key(ByteBuffer key, AbstractType<?> comparator) + { + this.key = key; + this.comparator = comparator; + } + + public int compareTo(Key o) + { + return comparator.compare(key, o.key); + } + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java index 0f9e389..a84e8bf 100644 --- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java +++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java @@ -206,6 +206,16 @@ public class OnDiskIndex implements Iterable<OnDiskIndex.DataTerm>, Closeable */ public RangeIterator<Long, Token> search(Expression exp) { + assert mode.supports(exp.getOp()); + + // optimization in case single term is requested from index + // we don't really need to build additional union iterator + if (exp.getOp() == Op.EQ) + { + DataTerm term = getTerm(exp.lower.value); + return term == null ? null : term.getTokens(); + } + // convert single NOT_EQ to range with exclusion final Expression expression = (exp.getOp() != Op.NOT_EQ) ? exp @@ -426,6 +436,12 @@ public class OnDiskIndex implements Iterable<OnDiskIndex.DataTerm>, Closeable return ptr; } + private DataTerm getTerm(ByteBuffer query) + { + SearchResult<DataTerm> term = searchIndex(query, getDataBlock(query)); + return term.cmp == 0 ? term.result : null; + } + private SearchResult<DataTerm> searchIndex(ByteBuffer query, int blockIdx) { return dataLevel.getBlock(blockIdx).search(comparator, query); http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java index 7b8f5c9..20a8739 100644 --- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java +++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java @@ -23,6 +23,7 @@ import java.nio.ByteBuffer; import java.util.*; import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.index.sasi.plan.Expression.Op; import org.apache.cassandra.index.sasi.sa.IntegralSA; import org.apache.cassandra.index.sasi.sa.SA; import org.apache.cassandra.index.sasi.sa.TermIterator; @@ -49,12 +50,26 @@ public class OnDiskIndexBuilder public enum Mode { - PREFIX, CONTAINS, SPARSE; + PREFIX(EnumSet.of(Op.EQ, Op.PREFIX, Op.NOT_EQ, Op.RANGE)), + CONTAINS(EnumSet.of(Op.EQ, Op.CONTAINS, Op.SUFFIX, Op.NOT_EQ)), + SPARSE(EnumSet.of(Op.EQ, Op.NOT_EQ, Op.RANGE)); + + Set<Op> supportedOps; + + Mode(Set<Op> ops) + { + supportedOps = ops; + } public static Mode mode(String mode) { return Mode.valueOf(mode.toUpperCase()); } + + public boolean supports(Op op) + { + return supportedOps.contains(op); + } } public enum TermSize http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java b/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java index e4ee6eb..99a417a 100644 --- a/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java +++ b/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java @@ -18,6 +18,7 @@ package org.apache.cassandra.index.sasi.memory; import java.nio.ByteBuffer; +import java.util.Collections; import java.util.List; import java.util.concurrent.ConcurrentSkipListSet; @@ -27,6 +28,7 @@ import org.apache.cassandra.index.sasi.conf.ColumnIndex; import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder; import org.apache.cassandra.index.sasi.disk.Token; import org.apache.cassandra.index.sasi.plan.Expression; +import org.apache.cassandra.index.sasi.plan.Expression.Op; import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer; import org.apache.cassandra.index.sasi.utils.RangeUnionIterator; import org.apache.cassandra.index.sasi.utils.RangeIterator; @@ -136,11 +138,9 @@ public class TrieMemIndex extends MemIndex public RangeIterator<Long, Token> search(Expression expression) { - assert expression.getOp() == Expression.Op.EQ; // means that min == max - ByteBuffer prefix = expression.lower == null ? null : expression.lower.value; - Iterable<ConcurrentSkipListSet<DecoratedKey>> search = search(definition.cellValueType().getString(prefix)); + Iterable<ConcurrentSkipListSet<DecoratedKey>> search = search(expression.getOp(), definition.cellValueType().getString(prefix)); RangeUnionIterator.Builder<Long, Token> builder = RangeUnionIterator.builder(); for (ConcurrentSkipListSet<DecoratedKey> keys : search) @@ -153,7 +153,7 @@ public class TrieMemIndex extends MemIndex } protected abstract ConcurrentSkipListSet<DecoratedKey> get(String value); - protected abstract Iterable<ConcurrentSkipListSet<DecoratedKey>> search(String value); + protected abstract Iterable<ConcurrentSkipListSet<DecoratedKey>> search(Op operator, String value); protected abstract ConcurrentSkipListSet<DecoratedKey> putIfAbsent(String value, ConcurrentSkipListSet<DecoratedKey> key); } @@ -177,9 +177,20 @@ public class TrieMemIndex extends MemIndex return trie.putIfAbsent(value, newKeys); } - public Iterable<ConcurrentSkipListSet<DecoratedKey>> search(String value) + public Iterable<ConcurrentSkipListSet<DecoratedKey>> search(Op operator, String value) { - return trie.getValuesForKeysStartingWith(value); + switch (operator) + { + case EQ: + ConcurrentSkipListSet<DecoratedKey> keys = trie.getValueForExactKey(value); + return keys == null ? Collections.emptyList() : Collections.singletonList(keys); + + case PREFIX: + return trie.getValuesForKeysStartingWith(value); + + default: + throw new UnsupportedOperationException(String.format("operation %s is not supported.", operator)); + } } } @@ -203,9 +214,23 @@ public class TrieMemIndex extends MemIndex return trie.putIfAbsent(value, newKeys); } - public Iterable<ConcurrentSkipListSet<DecoratedKey>> search(String value) + public Iterable<ConcurrentSkipListSet<DecoratedKey>> search(Op operator, String value) { - return trie.getValuesForKeysContaining(value); + switch (operator) + { + case EQ: + ConcurrentSkipListSet<DecoratedKey> keys = trie.getValueForExactKey(value); + return keys == null ? Collections.emptyList() : Collections.singletonList(keys); + + case SUFFIX: + return trie.getValuesForKeysEndingWith(value); + + case CONTAINS: + return trie.getValuesForKeysContaining(value); + + default: + throw new UnsupportedOperationException(String.format("operation %s is not supported.", operator)); + } } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/plan/Expression.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java index e215ec7..43f8251 100644 --- a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java +++ b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java @@ -46,7 +46,37 @@ public class Expression public enum Op { - EQ, NOT_EQ, RANGE + EQ, PREFIX, SUFFIX, CONTAINS, NOT_EQ, RANGE; + + public static Op valueOf(Operator operator) + { + switch (operator) + { + case EQ: + return EQ; + + case NEQ: + return NOT_EQ; + + case LT: + case GT: + case LTE: + case GTE: + return RANGE; + + case LIKE_PREFIX: + return PREFIX; + + case LIKE_SUFFIX: + return SUFFIX; + + case LIKE_CONTAINS: + return CONTAINS; + + default: + throw new IllegalArgumentException("unknown operator: " + operator); + } + } } private final QueryController controller; @@ -107,10 +137,13 @@ public class Expression boolean lowerInclusive = false, upperInclusive = false; switch (op) { + case LIKE_PREFIX: + case LIKE_SUFFIX: + case LIKE_CONTAINS: case EQ: lower = new Bound(value, true); upper = lower; - operation = Op.EQ; + operation = Op.valueOf(op); break; case NEQ: @@ -151,7 +184,7 @@ public class Expression return this; } - public boolean contains(ByteBuffer value) + public boolean isSatisfiedBy(ByteBuffer value) { if (!TypeUtil.isValid(value, validator)) { @@ -224,7 +257,31 @@ public class Expression while (analyzer.hasNext()) { ByteBuffer term = analyzer.next(); - if (ByteBufferUtil.contains(term, requestedValue)) + + boolean isMatch = false; + switch (operation) + { + case EQ: + // Operation.isSatisfiedBy handles conclusion on !=, + // here we just need to make sure that term matched it + case NOT_EQ: + isMatch = validator.compare(term, requestedValue) == 0; + break; + + case PREFIX: + isMatch = ByteBufferUtil.startsWith(term, requestedValue); + break; + + case SUFFIX: + isMatch = ByteBufferUtil.endsWith(term, requestedValue); + break; + + case CONTAINS: + isMatch = ByteBufferUtil.contains(term, requestedValue); + break; + } + + if (isMatch) return true; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/index/sasi/plan/Operation.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/index/sasi/plan/Operation.java b/src/java/org/apache/cassandra/index/sasi/plan/Operation.java index 1857c56..28bcc51 100644 --- a/src/java/org/apache/cassandra/index/sasi/plan/Operation.java +++ b/src/java/org/apache/cassandra/index/sasi/plan/Operation.java @@ -160,7 +160,7 @@ public class Operation extends RangeIterator<Long, Token> * * The algorithm is as follows: for every given expression from analyzed * list get corresponding column from the Row: - * - apply {@link Expression#contains(ByteBuffer)} + * - apply {@link Expression#isSatisfiedBy(ByteBuffer)} * method to figure out if it's satisfied; * - apply logical operation between boolean accumulator and current boolean result; * - if result == false and node's operation is AND return right away; @@ -225,7 +225,7 @@ public class Operation extends RangeIterator<Long, Token> for (int i = filters.size() - 1; i >= 0; i--) { Expression expression = filters.get(i); - isMatch = !isMissingColumn && expression.contains(value); + isMatch = !isMissingColumn && expression.isSatisfiedBy(value); if (expression.getOp() == Op.NOT_EQ) { // since this is NOT_EQ operation we have to @@ -282,16 +282,29 @@ public class Operation extends RangeIterator<Long, Token> AbstractAnalyzer analyzer = columnIndex.getAnalyzer(); analyzer.reset(e.getIndexValue()); - // EQ/NOT_EQ can have multiple expressions e.g. text = "Hello World", + // EQ/LIKE_*/NOT_EQ can have multiple expressions e.g. text = "Hello World", // becomes text = "Hello" OR text = "World" because "space" is always interpreted as a split point (by analyzer), // NOT_EQ is made an independent expression only in case of pre-existing multiple EQ expressions, or // if there is no EQ operations and NOT_EQ is met or a single NOT_EQ expression present, // in such case we know exactly that there would be no more EQ/RANGE expressions for given column // since NOT_EQ has the lowest priority. - if (e.operator() == Operator.EQ - || (e.operator() == Operator.NEQ - && (perColumn.size() == 0 || perColumn.size() > 1 - || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ)))) + boolean isMultiExpression = false; + switch (e.operator()) + { + case EQ: + case LIKE_PREFIX: + case LIKE_SUFFIX: + case LIKE_CONTAINS: + isMultiExpression = true; + break; + + case NEQ: + isMultiExpression = (perColumn.size() == 0 || perColumn.size() > 1 + || (perColumn.size() == 1 && perColumn.get(0).getOp() == Op.NOT_EQ)); + break; + } + + if (isMultiExpression) { while (analyzer.hasNext()) { @@ -323,6 +336,11 @@ public class Operation extends RangeIterator<Long, Token> switch (op) { case EQ: + return 5; + + case LIKE_PREFIX: + case LIKE_SUFFIX: + case LIKE_CONTAINS: return 4; case GTE: http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/src/java/org/apache/cassandra/utils/ByteBufferUtil.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java index fcda9ba..87e8b9d 100644 --- a/src/java/org/apache/cassandra/utils/ByteBufferUtil.java +++ b/src/java/org/apache/cassandra/utils/ByteBufferUtil.java @@ -710,4 +710,36 @@ public class ByteBufferUtil } return false; } + + public static boolean startsWith(ByteBuffer src, ByteBuffer prefix) + { + return startsWith(src, prefix, 0); + } + + public static boolean endsWith(ByteBuffer src, ByteBuffer suffix) + { + return startsWith(src, suffix, src.remaining() - suffix.remaining()); + } + + private static boolean startsWith(ByteBuffer src, ByteBuffer prefix, int offset) + { + if (offset < 0) + return false; + + int sPos = src.position() + offset; + int pPos = prefix.position(); + + if (src.remaining() - offset < prefix.remaining()) + return false; + + int len = Math.min(src.remaining() - offset, prefix.remaining()); + + while (len-- > 0) + { + if (src.get(sPos++) != prefix.get(pPos++)) + return false; + } + + return true; + } } http://git-wip-us.apache.org/repos/asf/cassandra/blob/39286654/test/unit/org/apache/cassandra/SchemaLoader.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/SchemaLoader.java b/test/unit/org/apache/cassandra/SchemaLoader.java index 51db4cd..9a8c424 100644 --- a/test/unit/org/apache/cassandra/SchemaLoader.java +++ b/test/unit/org/apache/cassandra/SchemaLoader.java @@ -541,6 +541,7 @@ public class SchemaLoader {{ put(IndexTarget.CUSTOM_INDEX_OPTION_NAME, SASIIndex.class.getName()); put(IndexTarget.TARGET_OPTION_NAME, "comment"); + put("mode", OnDiskIndexBuilder.Mode.CONTAINS.toString()); put("analyzed", "true"); }})) .with(IndexMetadata.fromSchemaMetadata("comment_suffix_split", IndexMetadata.Kind.CUSTOM, new HashMap<String, String>()