Integrate SASI index into Cassandra patch by xedin; reviewed by beobal for CASSANDRA-10661
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/72790dc8 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/72790dc8 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/72790dc8 Branch: refs/heads/trunk Commit: 72790dc8e34826b39ac696b03025ae6b7b6beb2b Parents: 11c8ca6 Author: Pavel Yaskevich <xe...@apache.org> Authored: Wed Dec 2 19:23:54 2015 -0800 Committer: Pavel Yaskevich <xe...@apache.org> Committed: Sat Jan 23 19:35:29 2016 -0800 ---------------------------------------------------------------------- CHANGES.txt | 1 + build.xml | 22 +- doc/SASI.md | 768 + lib/concurrent-trees-2.4.0.jar | Bin 0 -> 118696 bytes lib/hppc-0.5.4.jar | Bin 0 -> 1305173 bytes lib/jflex-1.6.0.jar | Bin 0 -> 1048690 bytes lib/licenses/concurrent-trees-2.4.0.txt | 201 + lib/licenses/hppc-0.5.4.txt | 202 + lib/licenses/jflex-1.6.0.txt | 201 + lib/licenses/primitive-1.0.txt | 201 + lib/licenses/snowball-stemmer-1.3.0.581.1.txt | 201 + lib/primitive-1.0.jar | Bin 0 -> 52589 bytes lib/snowball-stemmer-1.3.0.581.1.jar | Bin 0 -> 93019 bytes .../cassandra/config/DatabaseDescriptor.java | 7 +- .../org/apache/cassandra/db/ColumnIndex.java | 6 +- .../apache/cassandra/db/filter/RowFilter.java | 15 +- .../cassandra/index/SecondaryIndexManager.java | 11 + .../apache/cassandra/index/sasi/SASIIndex.java | 288 + .../cassandra/index/sasi/SASIIndexBuilder.java | 128 + .../cassandra/index/sasi/SSTableIndex.java | 187 + .../org/apache/cassandra/index/sasi/Term.java | 65 + .../cassandra/index/sasi/TermIterator.java | 208 + .../index/sasi/analyzer/AbstractAnalyzer.java | 51 + .../index/sasi/analyzer/NoOpAnalyzer.java | 54 + .../sasi/analyzer/NonTokenizingAnalyzer.java | 126 + .../sasi/analyzer/NonTokenizingOptions.java | 147 + .../sasi/analyzer/SUPPLEMENTARY.jflex-macro | 143 + .../index/sasi/analyzer/StandardAnalyzer.java | 194 + .../sasi/analyzer/StandardTokenizerImpl.jflex | 220 + .../analyzer/StandardTokenizerInterface.java | 65 + .../sasi/analyzer/StandardTokenizerOptions.java | 272 + .../analyzer/filter/BasicResultFilters.java | 76 + .../analyzer/filter/FilterPipelineBuilder.java | 51 + .../analyzer/filter/FilterPipelineExecutor.java | 53 + .../analyzer/filter/FilterPipelineTask.java | 52 + .../sasi/analyzer/filter/StemmerFactory.java | 101 + .../sasi/analyzer/filter/StemmingFilters.java | 46 + .../sasi/analyzer/filter/StopWordFactory.java | 100 + .../sasi/analyzer/filter/StopWordFilters.java | 42 + .../cassandra/index/sasi/conf/ColumnIndex.java | 193 + .../cassandra/index/sasi/conf/DataTracker.java | 162 + .../cassandra/index/sasi/conf/IndexMode.java | 169 + .../index/sasi/conf/view/PrefixTermTree.java | 194 + .../index/sasi/conf/view/RangeTermTree.java | 77 + .../index/sasi/conf/view/TermTree.java | 58 + .../cassandra/index/sasi/conf/view/View.java | 104 + .../cassandra/index/sasi/disk/Descriptor.java | 51 + .../cassandra/index/sasi/disk/OnDiskBlock.java | 142 + .../cassandra/index/sasi/disk/OnDiskIndex.java | 773 ++ .../index/sasi/disk/OnDiskIndexBuilder.java | 627 + .../index/sasi/disk/PerSSTableIndexWriter.java | 361 + .../apache/cassandra/index/sasi/disk/Token.java | 42 + .../cassandra/index/sasi/disk/TokenTree.java | 519 + .../index/sasi/disk/TokenTreeBuilder.java | 839 ++ .../exceptions/TimeQuotaExceededException.java | 21 + .../index/sasi/memory/IndexMemtable.java | 71 + .../index/sasi/memory/KeyRangeIterator.java | 118 + .../cassandra/index/sasi/memory/MemIndex.java | 51 + .../index/sasi/memory/SkipListMemIndex.java | 97 + .../index/sasi/memory/TrieMemIndex.java | 254 + .../cassandra/index/sasi/plan/Expression.java | 340 + .../cassandra/index/sasi/plan/Operation.java | 477 + .../index/sasi/plan/QueryController.java | 261 + .../cassandra/index/sasi/plan/QueryPlan.java | 170 + .../cassandra/index/sasi/sa/ByteTerm.java | 51 + .../cassandra/index/sasi/sa/CharTerm.java | 54 + .../cassandra/index/sasi/sa/IntegralSA.java | 84 + .../org/apache/cassandra/index/sasi/sa/SA.java | 58 + .../cassandra/index/sasi/sa/SuffixSA.java | 143 + .../apache/cassandra/index/sasi/sa/Term.java | 58 + .../cassandra/index/sasi/sa/TermIterator.java | 31 + .../index/sasi/utils/AbstractIterator.java | 155 + .../index/sasi/utils/CombinedTerm.java | 103 + .../index/sasi/utils/CombinedTermIterator.java | 87 + .../index/sasi/utils/CombinedValue.java | 25 + .../index/sasi/utils/MappedBuffer.java | 253 + .../index/sasi/utils/OnDiskIndexIterator.java | 64 + .../sasi/utils/RangeIntersectionIterator.java | 281 + .../index/sasi/utils/RangeIterator.java | 279 + .../index/sasi/utils/RangeUnionIterator.java | 158 + .../cassandra/index/sasi/utils/TypeUtil.java | 84 + .../sasi/utils/trie/AbstractPatriciaTrie.java | 1151 ++ .../index/sasi/utils/trie/AbstractTrie.java | 230 + .../cassandra/index/sasi/utils/trie/Cursor.java | 83 + .../index/sasi/utils/trie/KeyAnalyzer.java | 73 + .../index/sasi/utils/trie/PatriciaTrie.java | 1261 ++ .../cassandra/index/sasi/utils/trie/Trie.java | 152 + .../cassandra/index/sasi/utils/trie/Tries.java | 95 + .../apache/cassandra/io/sstable/Component.java | 13 +- .../cassandra/io/sstable/KeyIterator.java | 7 + .../io/sstable/format/SSTableFlushObserver.java | 12 +- .../io/sstable/format/SSTableReader.java | 20 + .../io/sstable/format/SSTableWriter.java | 9 +- .../io/util/DataOutputBufferFixed.java | 5 + .../cassandra/io/util/SequentialWriter.java | 7 + .../apache/cassandra/utils/ByteBufferUtil.java | 41 + .../org/apache/cassandra/utils/FBUtilities.java | 5 + .../cassandra/utils/memory/MemoryUtil.java | 5 +- .../index/sasi/analyzer/filter/ar_ST.txt | 163 + .../index/sasi/analyzer/filter/bg_ST.txt | 260 + .../index/sasi/analyzer/filter/cs_ST.txt | 257 + .../index/sasi/analyzer/filter/de_ST.txt | 604 + .../index/sasi/analyzer/filter/en_ST.txt | 572 + .../index/sasi/analyzer/filter/es_ST.txt | 308 + .../index/sasi/analyzer/filter/fi_ST.txt | 748 + .../index/sasi/analyzer/filter/fr_ST.txt | 464 + .../index/sasi/analyzer/filter/hi_ST.txt | 164 + .../index/sasi/analyzer/filter/hu_ST.txt | 738 + .../index/sasi/analyzer/filter/it_ST.txt | 400 + .../index/sasi/analyzer/filter/pl_ST.txt | 139 + .../index/sasi/analyzer/filter/pt_ST.txt | 357 + .../index/sasi/analyzer/filter/ro_ST.txt | 283 + .../index/sasi/analyzer/filter/ru_ST.txt | 423 + .../index/sasi/analyzer/filter/sv_ST.txt | 387 + test/conf/cassandra-murmur.yaml | 43 + ...dventures_of_huckleberry_finn_mark_twain.txt | 12361 +++++++++++++++++ .../tokenization/apache_license_header.txt | 16 + test/resources/tokenization/ja_jp_1.txt | 1 + test/resources/tokenization/ja_jp_2.txt | 2 + test/resources/tokenization/lorem_ipsum.txt | 1 + test/resources/tokenization/ru_ru_1.txt | 19 + .../tokenization/top_visited_domains.txt | 3 + test/resources/tokenization/zn_tw_1.txt | 19 + .../unit/org/apache/cassandra/SchemaLoader.java | 139 + .../apache/cassandra/cql3/KeyCacheCqlTest.java | 14 +- .../cassandra/index/sasi/SASIIndexTest.java | 1852 +++ .../analyzer/NonTokenizingAnalyzerTest.java | 78 + .../sasi/analyzer/StandardAnalyzerTest.java | 196 + .../index/sasi/disk/OnDiskIndexTest.java | 856 ++ .../sasi/disk/PerSSTableIndexWriterTest.java | 161 + .../index/sasi/disk/TokenTreeTest.java | 535 + .../index/sasi/plan/OperationTest.java | 645 + .../index/sasi/utils/LongIterator.java | 106 + .../index/sasi/utils/MappedBufferTest.java | 540 + .../utils/RangeIntersectionIteratorTest.java | 387 + .../sasi/utils/RangeUnionIteratorTest.java | 306 + .../format/SSTableFlushObserverTest.java | 5 +- 137 files changed, 40339 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 650bf5f..9309ef4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.4 + * Integrate SASI index into Cassandra (CASSANDRA-10661) * Add --skip-flush option to nodetool snapshot * Skip values for non-queried columns (CASSANDRA-10657) * Add support for secondary indexes on static columns (CASSANDRA-8103) http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/build.xml ---------------------------------------------------------------------- diff --git a/build.xml b/build.xml index f9f42f7..035628a 100644 --- a/build.xml +++ b/build.xml @@ -244,6 +244,15 @@ </target> <!-- + Generates Java sources for tokenization support from jflex + grammar files + --> + <target name="generate-jflex-java" description="Generate Java from jflex grammar"> + <taskdef classname="jflex.anttask.JFlexTask" classpath="${build.lib}/jflex-1.6.0.jar" name="jflex" /> + <jflex file="${build.src.java}/org/apache/cassandra/index/sasi/analyzer/StandardTokenizerImpl.jflex" destdir="${build.src.gen-java}/" /> + </target> + + <!-- Fetch Maven Ant Tasks and Cassandra's dependencies These targets are intentionally free of dependencies so that they can be run stand-alone from a binary release artifact. @@ -405,6 +414,11 @@ <exclusion groupId="log4j" artifactId="log4j"/> </dependency> <dependency groupId="joda-time" artifactId="joda-time" version="2.4" /> + <dependency groupId="com.carrotsearch" artifactId="hppc" version="0.5.4" /> + <dependency groupId="de.jflex" artifactId="jflex" version="1.6.0" /> + <dependency groupId="net.mintern" artifactId="primitive" version="1.0" /> + <dependency groupId="com.github.rholder" artifactId="snowball-stemmer" version="1.3.0.581.1" /> + <dependency groupId="com.googlecode.concurrent-trees" artifactId="concurrent-trees" version="2.4.0" /> </dependencyManagement> <developer id="alakshman" name="Avinash Lakshman"/> @@ -568,6 +582,12 @@ <dependency groupId="org.slf4j" artifactId="log4j-over-slf4j"/> <dependency groupId="org.slf4j" artifactId="jcl-over-slf4j"/> <dependency groupId="org.apache.thrift" artifactId="libthrift"/> + <dependency groupId="com.carrotsearch" artifactId="hppc" version="0.5.4" /> + <dependency groupId="de.jflex" artifactId="jflex" version="1.6.0" /> + <dependency groupId="net.mintern" artifactId="primitive" version="1.0" /> + <dependency groupId="com.github.rholder" artifactId="snowball-stemmer" version="1.3.0.581.1" /> + <dependency groupId="com.googlecode.concurrent-trees" artifactId="concurrent-trees" version="2.4.0" /> + </artifact:pom> <artifact:pom id="clientutil-pom" artifactId="cassandra-clientutil" @@ -734,7 +754,7 @@ depends="maven-ant-tasks-retrieve-build,build-project" description="Compile Cassandra classes"/> <target name="codecoverage" depends="jacoco-run,jacoco-report" description="Create code coverage report"/> - <target depends="init,gen-cql3-grammar,generate-cql-html" + <target depends="init,gen-cql3-grammar,generate-cql-html,generate-jflex-java" name="build-project"> <echo message="${ant.project.name}: ${ant.file}"/> <!-- Order matters! --> http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/doc/SASI.md ---------------------------------------------------------------------- diff --git a/doc/SASI.md b/doc/SASI.md new file mode 100644 index 0000000..64573b8 --- /dev/null +++ b/doc/SASI.md @@ -0,0 +1,768 @@ +# SASIIndex + +[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/SASIIndex.java), +or "SASI" for short, is an implementation of Cassandra's +`Index` interface that can be used as an alternative to the +existing implementations. SASI's indexing and querying improves on +existing implementations by tailoring it specifically to Cassandra's +needs. SASI has superior performance in cases where queries would +previously require filtering. In achieving this performance, SASI aims +to be significantly less resource intensive than existing +implementations, in memory, disk, and CPU usage. In addition, SASI +supports prefix and contains queries on strings (similar to SQL's +`LIKE = "foo*"` or `LIKE = "*foo*"'`). + +The following goes on describe how to get up and running with SASI, +demonstrates usage with examples, and provides some details on its +implementation. + +## Using SASI + +The examples below walk through creating a table and indexes on its +columns, and performing queries on some inserted data. The patchset in +this repository includes support for the Thrift and CQL3 interfaces. + +The examples below assume the `demo` keyspace has been created and is +in use. + +``` +cqlsh> CREATE KEYSPACE demo WITH replication = { + ... 'class': 'SimpleStrategy', + ... 'replication_factor': '1' + ... }; +cqlsh> USE demo; +``` + +All examples are performed on the `sasi` table: + +``` +cqlsh:demo> CREATE TABLE sasi (id uuid, first_name text, last_name text, + ... age int, height int, created_at bigint, primary key (id)); +``` + +#### Creating Indexes + +To create SASI indexes use CQLs `CREATE CUSTOM INDEX` statement: + +``` +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (first_name) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = { + ... 'analyzer_class': + ... 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', + ... 'case_sensitive': 'false' + ... }; + +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (last_name) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = {'mode': 'CONTAINS'}; + +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (age) USING 'org.apache.cassandra.index.sasi.SASIIndex'; + +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (created_at) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = {'mode': 'SPARSE'}; +``` + +The indexes created have some options specified that customize their +behaviour and potentially performance. The index on `first_name` is +case-insensitive. The analyzers are discussed more in a subsequent +example. The `NonTokenizingAnalyzer` performs no analysis on the +text. Each index has a mode: `PREFIX`, `CONTAINS`, or `SPARSE`, the +first being the default. The `last_name` index is created with the +mode `CONTAINS` which matches terms on suffixes instead of prefix +only. Examples of this are available below and more detail can be +found in the section on +[OnDiskIndex](https://github.com/xedin/sasi#ondiskindexbuilder).The +`created_at` column is created with its mode set to `SPARSE`, which is +meant to improve performance of querying large, dense number ranges +like timestamps for data inserted every millisecond. Details of the +`SPARSE` implementation can also be found in the section on the +[OnDiskIndex](https://github.com/xedin/sasi#ondiskindexbuilder). The `age` +index is created with the default `PREFIX` mode and no +case-sensitivity or text analysis options are specified since the +field is numeric. + +After inserting the following data and performing a `nodetool flush`, +SASI performing index flushes to disk can be seen in Cassandra's logs +-- although the direct call to flush is not required (see +[IndexMemtable](#indexmemtable) for more details). + +``` +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (556ebd54-cbe5-4b75-9aae-bf2a31a24500, 'Pavel', 'Yaskevich', 27, 181, 1442959315018); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (5770382a-c56f-4f3f-b755-450e24d55217, 'Jordan', 'West', 26, 173, 1442959315019); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (96053844-45c3-4f15-b1b7-b02c441d3ee1, 'Mikhail', 'Stepura', 36, 173, 1442959315020); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (f5dfcabe-de96-4148-9b80-a1c41ed276b4, 'Michael', 'Kjellman', 26, 180, 1442959315021); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (2970da43-e070-41a8-8bcb-35df7a0e608a, 'Johnny', 'Zhang', 32, 175, 1442959315022); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (6b757016-631d-4fdb-ac62-40b127ccfbc7, 'Jason', 'Brown', 40, 182, 1442959315023); + +cqlsh:demo> INSERT INTO sasi (id, first_name, last_name, age, height, created_at) + ... VALUES (8f909e8a-008e-49dd-8d43-1b0df348ed44, 'Vijay', 'Parthasarathy', 34, 183, 1442959315024); + +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi; + + first_name | last_name | age | height | created_at +------------+---------------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + Mikhail | Stepura | 36 | 173 | 1442959315020 + Jason | Brown | 40 | 182 | 1442959315023 + Pavel | Yaskevich | 27 | 181 | 1442959315018 + Vijay | Parthasarathy | 34 | 183 | 1442959315024 + Jordan | West | 26 | 173 | 1442959315019 + Johnny | Zhang | 32 | 175 | 1442959315022 + +(7 rows) +``` + +#### Equality & Prefix Queries + +SASI supports simple queries already supported by CQL, however, for +text fields like `first_name` equals queries perform prefix searches +-- this is similar to `first_name LIKE 'M*'` in SQL (excluding case +sensitivity, which is dependent on the index configuration). The +semantics of CQL's `=` were modified instead of making further +modifications of the grammar with the introduction of a `LIKE` +operator. Ideally, CQL would be modified to include such an operator, +supporting both prefix and suffix searches. + +``` +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name = 'M'; + + first_name | last_name | age | height | created_at +------------+-----------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + Mikhail | Stepura | 36 | 173 | 1442959315020 + +(2 rows) +``` + +Of course, the case of the query does not matter for the `first_name` +column because of the options provided at index creation time. + +``` +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name = 'm'; + + first_name | last_name | age | height | created_at +------------+-----------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + Mikhail | Stepura | 36 | 173 | 1442959315020 + +(2 rows) +``` + +#### Compound Queries + +SASI supports queries with multiple predicates, however, due to the +nature of the default indexing implementation, CQL requires the user +to specify `ALLOW FILTERING` to opt-in to the potential performance +pitfalls of such a query. With SASI, while the requirement to include +`ALLOW FILTERING` remains, to reduce modifications to the grammar, the +performance pitfalls do not exist because filtering is not +performed. Details on how SASI joins data from multiple predicates is +available below in the +[Implementation Details](https://github.com/xedin/sasi#implementation-details) +section. + +``` +cqlsh:demo> SELECT first_name, last_name, age, height, created_at FROM sasi + ... WHERE first_name = 'M' and age < 30 ALLOW FILTERING; + + first_name | last_name | age | height | created_at +------------+-----------+-----+--------+--------------- + Michael | Kjellman | 26 | 180 | 1442959315021 + +(1 rows) +``` + +#### Suffix Queries + +The next example demonstrates `CONTAINS` mode on the `last_name` +column. By using this mode predicates can search for any strings +containing the search string as a sub-string. In this case the strings +containing "a" or "an". + +``` +cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'a'; + + id | age | created_at | first_name | height | last_name +--------------------------------------+-----+---------------+------------+--------+--------------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman + 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | 1442959315020 | Mikhail | 173 | Stepura + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich + 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang + +(5 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'an'; + + id | age | created_at | first_name | height | last_name +--------------------------------------+-----+---------------+------------+--------+----------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang + +(2 rows) +``` + +#### Expressions on Non-Indexed Columns + +SASI also supports filtering on non-indexed columns like `height`. The +expression can only narrow down an existing query using `AND`. + +``` +cqlsh:demo> SELECT * FROM sasi WHERE last_name = 'a' AND height >= 175 ALLOW FILTERING; + + id | age | created_at | first_name | height | last_name +--------------------------------------+-----+---------------+------------+--------+--------------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | 1442959315021 | Michael | 180 | Kjellman + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | 1442959315018 | Pavel | 181 | Yaskevich + 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | 1442959315024 | Vijay | 183 | Parthasarathy + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | 1442959315022 | Johnny | 175 | Zhang + +(4 rows) +``` + +#### Text Analysis (Tokenization and Stemming) + +Lastly, to demonstrate text analysis an additional column is needed on +the table. Its definition, index, and statements to update rows are shown below. + +``` +cqlsh:demo> ALTER TABLE sasi ADD bio text; +cqlsh:demo> CREATE CUSTOM INDEX ON sasi (bio) USING 'org.apache.cassandra.index.sasi.SASIIndex' + ... WITH OPTIONS = { + ... 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer', + ... 'tokenization_enable_stemming': 'true', + ... 'analyzed': 'true', + ... 'tokenization_normalize_lowercase': 'true', + ... 'tokenization_locale': 'en' + ... }; +cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, who likes distributed systems, doesnt like to argue.' WHERE id = 5770382a-c56f-4f3f-b755-450e24d55217; +cqlsh:demo> UPDATE sasi SET bio = 'Software Engineer, works on the freight distribution at nights and likes arguing' WHERE id = 556ebd54-cbe5-4b75-9aae-bf2a31a24500; +cqlsh:demo> SELECT * FROM sasi; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+--------------- + f5dfcabe-de96-4148-9b80-a1c41ed276b4 | 26 | null | 1442959315021 | Michael | 180 | Kjellman + 96053844-45c3-4f15-b1b7-b02c441d3ee1 | 36 | null | 1442959315020 | Mikhail | 173 | Stepura + 6b757016-631d-4fdb-ac62-40b127ccfbc7 | 40 | null | 1442959315023 | Jason | 182 | Brown + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 8f909e8a-008e-49dd-8d43-1b0df348ed44 | 34 | null | 1442959315024 | Vijay | 183 | Parthasarathy + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + 2970da43-e070-41a8-8bcb-35df7a0e608a | 32 | null | 1442959315022 | Johnny | 175 | Zhang + +(7 rows) +``` + +Index terms and query search strings are stemmed for the `bio` column +because it was configured to use the +[`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java) +and `analyzed` is set to `true`. The +`tokenization_normalize_lowercase` is similar to the `case_sensitive` +property but for the +[`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java). These +query demonstrates the stemming applied by [`StandardAnalyzer`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/analyzer/StandardAnalyzer.java). + +``` +cqlsh:demo> SELECT * FROM sasi WHERE bio = 'distributing'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + +(2 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE bio = 'they argued'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + +(2 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE bio = 'working at the company'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + +(1 rows) + +cqlsh:demo> SELECT * FROM sasi WHERE bio = 'soft eng'; + + id | age | bio | created_at | first_name | height | last_name +--------------------------------------+-----+----------------------------------------------------------------------------------+---------------+------------+--------+----------- + 556ebd54-cbe5-4b75-9aae-bf2a31a24500 | 27 | Software Engineer, works on the freight distribution at nights and likes arguing | 1442959315018 | Pavel | 181 | Yaskevich + 5770382a-c56f-4f3f-b755-450e24d55217 | 26 | Software Engineer, who likes distributed systems, doesnt like to argue. | 1442959315019 | Jordan | 173 | West + +(2 rows) +``` + +## Implementation Details + +While SASI, at the surface, is simply an implementation of the +`Index` interface, at its core there are several data +structures and algorithms used to satisfy it. These are described +here. Additionally, the changes internal to Cassandra to support SASIs +integration are described. + +The `Index` interface divides responsibility of the +implementer into two parts: Indexing and Querying. Further, Cassandra +makes it possible to divide those responsibilities into the memory and +disk components. SASI takes advantage of Cassandra's write-once, +immutable, ordered data model to build indexes along with the flushing +of the memtable to disk -- this is the origin of the name "SSTable +Attached Secondary Index". + +The SASI index data structures are built in memory as the SSTable is +being written and they are flushed to disk before the writing of the +SSTable completes. The writing of each index file only requires +sequential writes to disk. In some cases, partial flushes are +performed, and later stitched back together, to reduce memory +usage. These data structures are optimized for this use case. + +Taking advantage of Cassandra's ordered data model, at query time, +candidate indexes are narrowed down for searching minimize the amount +of work done. Searching is then performed using an efficient method +that streams data off disk as needed. + +### Indexing + +Per SSTable, SASI writes an index file for each indexed column. The +data for these files is built in memory using the +[`OnDiskIndexBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java). Once +flushed to disk, the data is read using the +[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) +class. These are composed of bytes representing indexed terms, +organized for efficient writing or searching respectively. The keys +and values they hold represent tokens and positions in an SSTable and +these are stored per-indexed term in +[`TokenTreeBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java)s +for writing, and +[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s +for querying. These index files are memory mapped after being written +to disk, for quicker access. For indexing data in the memtable SASI +uses its +[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java) +class. + +#### OnDiskIndex(Builder) + +Each +[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) +is an instance of a modified +[Suffix Array](https://en.wikipedia.org/wiki/Suffix_array) data +structure. The +[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) +is comprised of page-size blocks of sorted terms and pointers to the +terms' associated data, as well as the data itself, stored also in one +or more page-sized blocks. The +[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) +is structured as a tree of arrays, where each level describes the +terms in the level below, the final level being the terms +themselves. The `PointerLevel`s and their `PointerBlock`s contain +terms and pointers to other blocks that *end* with those terms. The +`DataLevel`, the final level, and its `DataBlock`s contain terms and +point to the data itself, contained in [`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s. + +The terms written to the +[`OnDiskIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java) +vary depending on its "mode": either `PREFIX`, `CONTAINS`, or +`SPARSE`. In the `PREFIX` and `SPARSE` cases terms exact values are +written exactly once per `OnDiskIndex`. For example, a `PREFIX` index +with terms `Jason`, `Jordan`, `Pavel`, all three will be included in +the index. A `CONTAINS` index writes additional terms for each suffix of +each term recursively. Continuing with the example, a `CONTAINS` index +storing the previous terms would also store `ason`, `ordan`, `avel`, +`son`, `rdan`, `vel`, etc. This allows for queries on the suffix of +strings. The `SPARSE` mode differs from `PREFIX` in that for every 64 +blocks of terms a +[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java) +is built merging all the +[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s +for each term into a single one. This copy of the data is used for +efficient iteration of large ranges of e.g. timestamps. The index +"mode" is configurable per column at index creation time. + +#### TokenTree(Builder) + +The +[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java) +is an implementation of the well-known +[B+-tree](https://en.wikipedia.org/wiki/B%2B_tree) that has been +modified to optimize for its use-case. In particular, it has been +optimized to associate tokens, longs, with a set of positions in an +SSTable, also longs. Allowing the set of long values accommodates +the possibility of a hash collision in the token, but the data +structure is optimized for the unlikely possibility of such a +collision. + +To optimize for its write-once environment the +[`TokenTreeBuilder`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTreeBuilder.java) +completely loads its interior nodes as the tree is built and it uses +the well-known algorithm optimized for bulk-loading the data +structure. + +[`TokenTree`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/TokenTree.java)s provide the means to iterate a tokens, and file +positions, that match a given term, and to skip forward in that +iteration, an operation used heavily at query time. + +#### IndexMemtable + +The +[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java) +handles indexing the in-memory data held in the memtable. The +[`IndexMemtable`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/IndexMemtable.java) +in turn manages either a +[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) +or a +[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java) +per-column. The choice of which index type is used is data +dependent. The +[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) +is used for literal types. `AsciiType` and `UTF8Type` are literal +types by defualt but any column can be configured as a literal type +using the `is_literal` option at index creation time. For non-literal +types the +[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java) +is used. The +[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) +is an implementation that can efficiently support prefix queries on +character-like data. The +[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java), +conversely, is better suited for Cassandra other data types like +numbers. + +The +[`TrieMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java) +is built using either the `ConcurrentRadixTree` or +`ConcurrentSuffixTree` from the `com.goooglecode.concurrenttrees` +package. The choice between the two is made based on the indexing +mode, `PREFIX` or other modes, and `CONTAINS` mode, respectively. + +The +[`SkipListMemIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/memory/SkipListMemIndex.java) +is built on top of `java.util.concurrent.ConcurrentSkipListSet`. + +### Querying + +Responsible for converting the internal `IndexExpression` +representation into SASI's +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) +and +[`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java) +tree, optimizing the tree to reduce the amount of work done, and +driving the query itself the +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +is the work horse of SASI's querying implementation. To efficiently +perform union and intersection operations SASI provides several +iterators similar to Cassandra's `MergeIterator` but tailored +specifically for SASIs use, and with more features. The +[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java), +like its name suggests, performs set union over sets of tokens/keys +matching the query, only reading as much data as it needs from each +set to satisfy the query. The +[`RangeIntersectionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java), +similar to its counterpart, performs set intersection over its data. + +#### QueryPlan + +The +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +instantiated per search query is at the core of SASIs querying +implementation. Its work can be divided in two stages: analysis and +execution. + +During the analysis phase, +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +converts from Cassandra's internal representation of +`IndexExpression`s, which has also been modified to support encoding +queries that contain ORs and groupings of expressions using +parentheses (see the +[Cassandra Internal Changes](https://github.com/xedin/sasi#cassandra-internal-changes) +section below for more details). This process produces a tree of +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s, which in turn may contain [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s, all of which +provide an alternative, more efficient, representation of the query. + +During execution the +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +uses the `DecoratedKey`-generating iterator created from the +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) tree. These keys are read from disk and a final check to +ensure they satisfy the query is made, once again using the +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) tree. At the point the desired amount of matching data has +been found, or there is no more matching data, the result set is +returned to the coordinator through the existing internal components. + +The number of queries (total/failed/timed-out), and their latencies, +are maintined per-table/column family. + +SASI also supports concurrently iterating terms for the same index +accross SSTables. The concurrency factor is controlled by the +`cassandra.search_concurrency_factor` system property. The default is +`1`. + +##### QueryController + +Each +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +references a +[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java) +used throughout the execution phase. The +[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java) +has two responsibilities: to manage and ensure the proper cleanup of +resources (indexes), and to strictly enforce the time bound for query, +specified by the user via the range slice timeout. All indexes are +accessed via the +[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java) +so that they can be safely released by it later. The +[`QueryController`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryController.java)'s +`checkpoint` function is called in specific places in the execution +path to ensure the time-bound is enforced. + +##### QueryPlan Optimizations + +While in the analysis phase, the +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +performs several potential optimizations to the query. The goal of +these optimizations is to reduce the amount of work performed during +the execution phase. + +The simplest optimization performed is compacting multiple expressions +joined by logical intersection (`AND`) into a single [`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) with +three or more [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s. For example, the query `WHERE age < 100 AND +fname = 'p*' AND first_name != 'pa*' AND age > 21` would, +without modification, have the following tree: + + âââââââââ + ââââââââââ AND ââââââââ + â âââââââââ â + â¼ â¼ + âââââââââ ââââââââââââ + âââââââ AND âââââââ âage < 100 â + â âââââââââ â ââââââââââââ + â¼ â¼ + ââââââââââââ âââââââââ + â fname=p* â âââ AND âââââ + ââââââââââââ â âââââââââ â + â¼ â¼ + ââââââââââââ ââââââââââââ + âfname!=pa*â â age > 21 â + ââââââââââââ ââââââââââââ + +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +will remove the redundant right branch whose root is the final `AND` +and has leaves `fname != pa*` and `age > 21`. These [`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s will +be compacted into the parent `AND`, a safe operation due to `AND` +being associative and commutative. The resulting tree looks like the +following: + + âââââââââ + ââââââââââ AND ââââââââ + â âââââââââ â + â¼ â¼ + âââââââââ ââââââââââââ + âââââââââââââ AND ââââââââââ âage < 100 â + â âââââââââ â ââââââââââââ + â¼ â â¼ + ââââââââââââ â ââââââââââââ + â fname=p* â â¼ â age > 21 â + ââââââââââââ ââââââââââââ ââââââââââââ + âfname!=pa*â + ââââââââââââ + +When excluding results from the result set, using `!=`, the +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +determines the best method for handling it. For range queries, for +example, it may be optimal to divide the range into multiple parts +with a hole for the exclusion. For string queries, such as this one, +it is more optimal, however, to simply note which data to skip, or +exclude, while scanning the index. Following this optimization the +tree looks like this: + + âââââââââ + ââââââââââ AND ââââââââ + â âââââââââ â + â¼ â¼ + âââââââââ ââââââââââââ + âââââââââ AND ââââââââââ âage < 100 â + â âââââââââ â ââââââââââââ + â¼ â¼ + ââââââââââââââââââââ ââââââââââââ + â fname=p* â â age > 21 â + â exclusions=[pa*] â ââââââââââââ + ââââââââââââââââââââ + +The last type of optimization applied, for this query, is to merge +range expressions across branches of the tree -- without modifying the +meaning of the query, of course. In this case, because the query +contains all `AND`s the `age` expressions can be collapsed. Along with +this optimization, the initial collapsing of unneeded `AND`s can also +be applied once more to result in this final tree using to execute the +query: + + âââââââââ + ââââââââ AND âââââââââ + â âââââââââ â + â¼ â¼ + ââââââââââââââââââââ ââââââââââââââââââ + â fname=p* â â 21 < age < 100 â + â exclusions=[pa*] â ââââââââââââââââââ + ââââââââââââââââââââ + +#### Operations and Expressions + +As discussed, the +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +optimizes a tree represented by +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s +as interior nodes, and +[`Expression`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Expression.java)s +as leaves. The +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) +class, more specifically, can have zero, one, or two +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s +as children and an unlimited number of expressions. The iterators used +to perform the queries, discussed below in the +"Range(Union|Intersection)Iterator" section, implement the necessary +logic to merge results transparently regardless of the +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java)s +children. + +Besides participating in the optimizations performed by the +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java), +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) +is also responsible for taking a row that has been returned by the +query and making a final validation that it in fact does match. This +`satisfiesBy` operation is performed recursively from the root of the +[`Operation`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java) +tree for a given query. These checks are performed directly on the +data in a given row. For more details on how `satisfiesBy` works see +the documentation +[in the code](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/Operation.java#L87-L123). + +#### Range(Union|Intersection)Iterator + +The abstract `RangeIterator` class provides a unified interface over +the two main operations performed by SASI at various layers in the +execution path: set intersection and union. These operations are +performed in a iterated, or "streaming", fashion to prevent unneeded +reads of elements from either set. In both the intersection and union +cases the algorithms take advantage of the data being pre-sorted using +the same sort order, e.g. term or token order. + +The +[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java) +performs the "Merge-Join" portion of the +[Sort-Merge-Join](https://en.wikipedia.org/wiki/Sort-merge_join) +algorithm, with the properties of an outer-join, or union. It is +implemented with several optimizations to improve its performance over +a large number of iterators -- sets to union. Specifically, the +iterator exploits the likely case of the data having many sub-groups +of overlapping ranges and the unlikely case that all ranges will +overlap each other. For more details see the +[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java#L9-L21). + +The +[`RangeIntersectionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java) +itself is not a subclass of `RangeIterator`. It is a container for +several classes, one of which, `AbstractIntersectionIterator`, +sub-classes `RangeIterator`. SASI supports two methods of performing +the intersection operation, and the ability to be adaptive in choosing +between them based on some properties of the data. + +`BounceIntersectionIterator`, and the `BOUNCE` strategy, works like +the +[`RangeUnionIterator`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeUnionIterator.java) +in that it performs a "Merge-Join", however, its nature is similar to +a inner-join, where like values are merged by a data-specific merge +function (e.g. merging two tokens in a list to lookup in a SSTable +later). See the +[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L88-L101) +for more details on its implementation. + +`LookupIntersectionIterator`, and the `LOOKUP` strategy, performs a +different operation, more similar to a lookup in an associative data +structure, or "hash lookup" in database terminology. Once again, +details on the implementation can be found in the +[javadoc](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/utils/RangeIntersectionIterator.java#L199-L208). + +The choice between the two iterators, or the `ADAPTIVE` strategy, is +based upon the ratio of data set sizes of the minimum and maximum +range of the sets being intersected. If the number of the elements in +minimum range divided by the number of elements is the maximum range +is less than or equal to `0.01`, then the `ADAPTIVE` strategy chooses +the `LookupIntersectionIterator`, otherwise the +`BounceIntersectionIterator` is chosen. + +### The SASIIndex Class + +The above components are glued together by the +[`SASIIndex`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasiIndex.java) +class which implements `Index`, and is instantiated +per-table containing SASI indexes. It manages all indexes for a table +via the +[`sasi.conf.DataTracker`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/DataTracker.java) +and +[`sasi.conf.view.View`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/conf/view/View.java) +components, controls writing of all indexes for an SSTable via its +[`PerSSTableIndexWriter`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/disk/PerSSTableIndexWriter.java), and initiates searches with +`Indexer`. These classes glue the previously +mentioned indexing components together with Cassandra's SSTable +life-cycle ensuring indexes are not only written when Memtable's flush +but also as SSTable's are compacted. For querying, the +`Indexer` does little but defer to +[`QueryPlan`](https://github.com/apache/cassandra/blob/trunk/src/java/org/apache/cassandra/index/sasi/plan/QueryPlan.java) +and update e.g. latency metrics exposed by SASI. + +### Cassandra Internal Changes + +To support the above changes and integrate them into Cassandra a few +minor internal changes were made to Cassandra itself. These are +described here. + +#### SSTable Write Life-cycle Notifications + +The `SSTableFlushObserver` is an observer pattern-like interface, +whose sub-classes can register to be notified about events in the +life-cycle of writing out a SSTable. Sub-classes can be notified when a +flush begins and ends, as well as when each next row is about to be +written, and each next column. SASI's `PerSSTableIndexWriter`, +discussed above, is the only current subclass. + +### Limitations and Caveats + +The following are items that can be addressed in future updates but are not +available in this repository or are not currently implemented. + +* The cluster must be configured to use a partitioner that produces + `LongToken`s, e.g. `Murmur3Partitioner`. Other existing partitioners which + don't produce LongToken e.g. `ByteOrderedPartitioner` and `RandomPartitioner` + will not work with SASI. +* `ALLOW FILTERING`, the requirement of at least one indexes `=` + expression, and lack of `LIKE` limit SASIs + feature-set. Modifications to the grammar to allow `Index` + implementations to enumerate its supported features would allow SASI + to expose more features without need to support them in other + implementations. +* Not Equals and OR support have been removed in this release while + changes are made to Cassandra itself to support them. + +### Contributors + +* [Pavel Yaskevich](https://github.com/xedin) +* [Jordan West](https://github.com/jrwest) +* [Michael Kjellman](https://github.com/mkjellman) +* [Jason Brown](https://github.com/jasobrown) +* [Mikhail Stepura](https://github.com/mishail) http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/concurrent-trees-2.4.0.jar ---------------------------------------------------------------------- diff --git a/lib/concurrent-trees-2.4.0.jar b/lib/concurrent-trees-2.4.0.jar new file mode 100644 index 0000000..9c488fe Binary files /dev/null and b/lib/concurrent-trees-2.4.0.jar differ http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/hppc-0.5.4.jar ---------------------------------------------------------------------- diff --git a/lib/hppc-0.5.4.jar b/lib/hppc-0.5.4.jar new file mode 100644 index 0000000..d84b83b Binary files /dev/null and b/lib/hppc-0.5.4.jar differ http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/jflex-1.6.0.jar ---------------------------------------------------------------------- diff --git a/lib/jflex-1.6.0.jar b/lib/jflex-1.6.0.jar new file mode 100644 index 0000000..550e446 Binary files /dev/null and b/lib/jflex-1.6.0.jar differ http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/licenses/concurrent-trees-2.4.0.txt ---------------------------------------------------------------------- diff --git a/lib/licenses/concurrent-trees-2.4.0.txt b/lib/licenses/concurrent-trees-2.4.0.txt new file mode 100644 index 0000000..50086f8 --- /dev/null +++ b/lib/licenses/concurrent-trees-2.4.0.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file http://git-wip-us.apache.org/repos/asf/cassandra/blob/72790dc8/lib/licenses/hppc-0.5.4.txt ---------------------------------------------------------------------- diff --git a/lib/licenses/hppc-0.5.4.txt b/lib/licenses/hppc-0.5.4.txt new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/lib/licenses/hppc-0.5.4.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License.