Repository: cassandra
Updated Branches:
  refs/heads/trunk e375a9b1b -> 2ca2fffee


Support EQ/PREFIX queries in SASI CONTAINS mode without tokenization

patch by jrwest and xedin; reviewed by xedin for CASSANDRA-11434


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/2ca2fffe
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/2ca2fffe
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/2ca2fffe

Branch: refs/heads/trunk
Commit: 2ca2fffee80c75e3d1aa38badbefbd3b35851901
Parents: e375a9b
Author: Jordan West <jorda...@gmail.com>
Authored: Thu Mar 24 23:32:45 2016 -0700
Committer: Pavel Yaskevich <xe...@apache.org>
Committed: Sun Apr 3 00:27:54 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../cassandra/index/sasi/SSTableIndex.java      | 11 +++++
 .../org/apache/cassandra/index/sasi/Term.java   | 24 +++++++++--
 .../cassandra/index/sasi/TermIterator.java      |  9 ++++
 .../cassandra/index/sasi/conf/ColumnIndex.java  |  6 ++-
 .../cassandra/index/sasi/disk/Descriptor.java   |  2 +-
 .../cassandra/index/sasi/disk/OnDiskIndex.java  | 23 ++++++++--
 .../index/sasi/disk/OnDiskIndexBuilder.java     | 36 +++++++++++-----
 .../index/sasi/memory/TrieMemIndex.java         |  1 +
 .../cassandra/index/sasi/plan/Expression.java   | 10 +++++
 .../cassandra/index/sasi/sa/IndexedTerm.java    | 43 +++++++++++++++++++
 .../cassandra/index/sasi/sa/IntegralSA.java     |  5 ++-
 .../cassandra/index/sasi/sa/SuffixSA.java       | 41 ++++++++++++------
 .../cassandra/index/sasi/sa/TermIterator.java   |  2 +-
 .../index/sasi/utils/CombinedTerm.java          |  5 +++
 .../index/sasi/utils/CombinedTermIterator.java  |  5 ++-
 .../cassandra/index/sasi/SASIIndexTest.java     | 45 ++++++++++++++++----
 .../index/sasi/disk/OnDiskIndexTest.java        |  9 +++-
 18 files changed, 230 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index a01196f..e5ee3d8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.6
+ * Support EQ/PREFIX queries in SASI CONTAINS mode without tokenization 
(CASSANDRA-11434)
  * Support LIKE operator in prepared statements (CASSANDRA-11456)
  * Add a command to see if a Materialized View has finished building 
(CASSANDRA-9967)
  * Log endpoint and port associated with streaming operation (CASSANDRA-8777)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/SSTableIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/SSTableIndex.java 
b/src/java/org/apache/cassandra/index/sasi/SSTableIndex.java
index 7b65232..c67c39c 100644
--- a/src/java/org/apache/cassandra/index/sasi/SSTableIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/SSTableIndex.java
@@ -27,6 +27,7 @@ import org.apache.cassandra.db.DecoratedKey;
 import org.apache.cassandra.db.marshal.AbstractType;
 import org.apache.cassandra.index.sasi.conf.ColumnIndex;
 import org.apache.cassandra.index.sasi.disk.OnDiskIndex;
+import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
 import org.apache.cassandra.index.sasi.disk.Token;
 import org.apache.cassandra.index.sasi.plan.Expression;
 import org.apache.cassandra.index.sasi.utils.RangeIterator;
@@ -67,6 +68,16 @@ public class SSTableIndex
         this.index = new OnDiskIndex(indexFile, validator, new 
DecoratedKeyFetcher(sstable));
     }
 
+    public OnDiskIndexBuilder.Mode mode()
+    {
+        return index.mode();
+    }
+
+    public boolean hasMarkedPartials()
+    {
+        return index.hasMarkedPartials();
+    }
+
     public ByteBuffer minTerm()
     {
         return index.minTerm();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/Term.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/Term.java 
b/src/java/org/apache/cassandra/index/sasi/Term.java
index 8e8ceb2..8f42d58 100644
--- a/src/java/org/apache/cassandra/index/sasi/Term.java
+++ b/src/java/org/apache/cassandra/index/sasi/Term.java
@@ -23,30 +23,41 @@ import 
org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder.TermSize;
 import org.apache.cassandra.index.sasi.utils.MappedBuffer;
 import org.apache.cassandra.db.marshal.AbstractType;
 
+import static 
org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder.IS_PARTIAL_BIT;
+
 public class Term
 {
     protected final MappedBuffer content;
     protected final TermSize termSize;
 
+    private final boolean hasMarkedPartials;
 
-    public Term(MappedBuffer content, TermSize size)
+    public Term(MappedBuffer content, TermSize size, boolean hasMarkedPartials)
     {
         this.content = content;
         this.termSize = size;
+        this.hasMarkedPartials = hasMarkedPartials;
     }
 
     public ByteBuffer getTerm()
     {
         long offset = termSize.isConstant() ? content.position() : 
content.position() + 2;
-        int  length = termSize.isConstant() ? termSize.size : 
content.getShort(content.position());
+        int  length = termSize.isConstant() ? termSize.size : 
readLength(content.position());
 
         return content.getPageRegion(offset, length);
     }
 
+    public boolean isPartial()
+    {
+        return !termSize.isConstant()
+               && hasMarkedPartials
+               && (content.getShort(content.position()) & (1 << 
IS_PARTIAL_BIT)) != 0;
+    }
+
     public long getDataOffset()
     {
         long position = content.position();
-        return position + (termSize.isConstant() ? termSize.size : 2 + 
content.getShort(position));
+        return position + (termSize.isConstant() ? termSize.size : 2 + 
readLength(position));
     }
 
     public int compareTo(AbstractType<?> comparator, ByteBuffer query)
@@ -58,8 +69,13 @@ public class Term
     {
         long position = content.position();
         int padding = termSize.isConstant() ? 0 : 2;
-        int len = termSize.isConstant() ? termSize.size : 
content.getShort(position);
+        int len = termSize.isConstant() ? termSize.size : readLength(position);
 
         return content.comparePageTo(position + padding, checkFully ? len : 
Math.min(len, query.remaining()), comparator, query);
     }
+
+    private short readLength(long position)
+    {
+        return (short) (content.getShort(position) & ~(1 << IS_PARTIAL_BIT));
+    }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/TermIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/TermIterator.java 
b/src/java/org/apache/cassandra/index/sasi/TermIterator.java
index d2db88c..5b08a56 100644
--- a/src/java/org/apache/cassandra/index/sasi/TermIterator.java
+++ b/src/java/org/apache/cassandra/index/sasi/TermIterator.java
@@ -24,6 +24,7 @@ import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.cassandra.config.DatabaseDescriptor;
+import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
 import org.apache.cassandra.index.sasi.disk.Token;
 import org.apache.cassandra.index.sasi.plan.Expression;
 import org.apache.cassandra.index.sasi.utils.RangeUnionIterator;
@@ -101,6 +102,14 @@ public class TermIterator extends RangeIterator<Long, 
Token>
 
             for (final SSTableIndex index : perSSTableIndexes)
             {
+                if (e.getOp() == Expression.Op.PREFIX &&
+                    index.mode() == OnDiskIndexBuilder.Mode.CONTAINS && 
!index.hasMarkedPartials())
+                    throw new UnsupportedOperationException(String.format("The 
index %s has not yet been upgraded " +
+                                                                          "to 
support prefix queries in CONTAINS mode. " +
+                                                                          
"Wait for compaction or rebuild the index.",
+                                                                          
index.getPath()));
+
+
                 if (!index.reference())
                 {
                     latch.countDown();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java 
b/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java
index 89129e7..8ee94d1 100644
--- a/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/conf/ColumnIndex.java
@@ -39,6 +39,7 @@ import org.apache.cassandra.db.rows.Cell;
 import org.apache.cassandra.db.rows.Row;
 import org.apache.cassandra.index.sasi.analyzer.AbstractAnalyzer;
 import org.apache.cassandra.index.sasi.conf.view.View;
+import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
 import org.apache.cassandra.index.sasi.disk.Token;
 import org.apache.cassandra.index.sasi.memory.IndexMemtable;
 import org.apache.cassandra.index.sasi.plan.Expression;
@@ -217,8 +218,9 @@ public class ColumnIndex
 
         Op operator = Op.valueOf(op);
         return !(isTokenized && operator == Op.EQ) // EQ is only applicable to 
non-tokenized indexes
-            && !(isLiteral() && operator == Op.RANGE) // RANGE only applicable 
to indexes non-literal indexes
-            && mode.supports(operator); // for all other cases let's refer to 
index itself
+               && !(isTokenized && mode.mode == 
OnDiskIndexBuilder.Mode.CONTAINS && operator == Op.PREFIX) // PREFIX not 
supported on tokenized CONTAINS mode indexes
+               && !(isLiteral() && operator == Op.RANGE) // RANGE only 
applicable to indexes non-literal indexes
+               && mode.supports(operator); // for all other cases let's refer 
to index itself
 
     }
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/disk/Descriptor.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/disk/Descriptor.java 
b/src/java/org/apache/cassandra/index/sasi/disk/Descriptor.java
index a719f50..3aa6f14 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/Descriptor.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/Descriptor.java
@@ -18,7 +18,7 @@
 package org.apache.cassandra.index.sasi.disk;
 
 /**
- * Object descriptor for SSTableAttachedSecondaryIndex files. Similar to, and 
based upon, the sstable descriptor.
+ * Object descriptor for SASIIndex files. Similar to, and based upon, the 
sstable descriptor.
  */
 public class Descriptor
 {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java 
b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
index ef487f1..80092ef 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndex.java
@@ -104,6 +104,7 @@ public class OnDiskIndex implements 
Iterable<OnDiskIndex.DataTerm>, Closeable
     protected final AbstractType<?> comparator;
     protected final MappedBuffer indexFile;
     protected final long indexSize;
+    protected final boolean hasMarkedPartials;
 
     protected final Function<Long, DecoratedKey> keyFetcher;
 
@@ -138,6 +139,7 @@ public class OnDiskIndex implements 
Iterable<OnDiskIndex.DataTerm>, Closeable
             maxKey = ByteBufferUtil.readWithShortLength(backingFile);
 
             mode = OnDiskIndexBuilder.Mode.mode(backingFile.readUTF());
+            hasMarkedPartials = backingFile.readBoolean();
 
             indexSize = backingFile.length();
             indexFile = new MappedBuffer(new ChannelProxy(indexPath, 
backingFile.getChannel()));
@@ -167,6 +169,16 @@ public class OnDiskIndex implements 
Iterable<OnDiskIndex.DataTerm>, Closeable
         }
     }
 
+    public boolean hasMarkedPartials()
+    {
+        return hasMarkedPartials;
+    }
+
+    public OnDiskIndexBuilder.Mode mode()
+    {
+        return mode;
+    }
+
     public ByteBuffer minTerm()
     {
         return minTerm;
@@ -209,6 +221,9 @@ public class OnDiskIndex implements 
Iterable<OnDiskIndex.DataTerm>, Closeable
     {
         assert mode.supports(exp.getOp());
 
+        if (exp.getOp() == Expression.Op.PREFIX && mode == 
OnDiskIndexBuilder.Mode.CONTAINS && !hasMarkedPartials)
+            throw new UnsupportedOperationException("prefix queries in 
CONTAINS mode are not supported by this index");
+
         // optimization in case single term is requested from index
         // we don't really need to build additional union iterator
         if (exp.getOp() == Op.EQ)
@@ -602,7 +617,7 @@ public class OnDiskIndex implements 
Iterable<OnDiskIndex.DataTerm>, Closeable
 
         protected PointerTerm cast(MappedBuffer data)
         {
-            return new PointerTerm(data, termSize);
+            return new PointerTerm(data, termSize, hasMarkedPartials);
         }
     }
 
@@ -612,7 +627,7 @@ public class OnDiskIndex implements 
Iterable<OnDiskIndex.DataTerm>, Closeable
 
         protected DataTerm(MappedBuffer content, OnDiskIndexBuilder.TermSize 
size, TokenTree perBlockIndex)
         {
-            super(content, size);
+            super(content, size, hasMarkedPartials);
             this.perBlockIndex = perBlockIndex;
         }
 
@@ -660,9 +675,9 @@ public class OnDiskIndex implements 
Iterable<OnDiskIndex.DataTerm>, Closeable
 
     protected static class PointerTerm extends Term
     {
-        public PointerTerm(MappedBuffer content, OnDiskIndexBuilder.TermSize 
size)
+        public PointerTerm(MappedBuffer content, OnDiskIndexBuilder.TermSize 
size, boolean hasMarkedPartials)
         {
-            super(content, size);
+            super(content, size, hasMarkedPartials);
         }
 
         public int getBlock()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java 
b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java
index 888dc63..8acbb05 100644
--- a/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java
+++ b/src/java/org/apache/cassandra/index/sasi/disk/OnDiskIndexBuilder.java
@@ -24,6 +24,7 @@ import java.util.*;
 
 import org.apache.cassandra.db.DecoratedKey;
 import org.apache.cassandra.index.sasi.plan.Expression.Op;
+import org.apache.cassandra.index.sasi.sa.IndexedTerm;
 import org.apache.cassandra.index.sasi.sa.IntegralSA;
 import org.apache.cassandra.index.sasi.sa.SA;
 import org.apache.cassandra.index.sasi.sa.TermIterator;
@@ -51,7 +52,7 @@ public class OnDiskIndexBuilder
     public enum Mode
     {
         PREFIX(EnumSet.of(Op.EQ, Op.MATCH, Op.PREFIX, Op.NOT_EQ, Op.RANGE)),
-        CONTAINS(EnumSet.of(Op.MATCH, Op.CONTAINS, Op.SUFFIX, Op.NOT_EQ)),
+        CONTAINS(EnumSet.of(Op.EQ, Op.MATCH, Op.CONTAINS, Op.PREFIX, 
Op.SUFFIX, Op.NOT_EQ)),
         SPARSE(EnumSet.of(Op.EQ, Op.NOT_EQ, Op.RANGE));
 
         Set<Op> supportedOps;
@@ -128,6 +129,7 @@ public class OnDiskIndexBuilder
     public static final int BLOCK_SIZE = 4096;
     public static final int MAX_TERM_SIZE = 1024;
     public static final int SUPER_BLOCK_SIZE = 64;
+    public static final int IS_PARTIAL_BIT = 15;
 
     private final List<MutableLevel<InMemoryPointerTerm>> levels = new 
ArrayList<>();
     private MutableLevel<InMemoryDataTerm> dataLevel;
@@ -138,17 +140,24 @@ public class OnDiskIndexBuilder
 
     private final Map<ByteBuffer, TokenTreeBuilder> terms;
     private final Mode mode;
+    private final boolean marksPartials;
 
     private ByteBuffer minKey, maxKey;
     private long estimatedBytes;
 
     public OnDiskIndexBuilder(AbstractType<?> keyComparator, AbstractType<?> 
comparator, Mode mode)
     {
+        this(keyComparator, comparator, mode, true);
+    }
+
+    public OnDiskIndexBuilder(AbstractType<?> keyComparator, AbstractType<?> 
comparator, Mode mode, boolean marksPartials)
+    {
         this.keyComparator = keyComparator;
         this.termComparator = comparator;
         this.terms = new HashMap<>();
         this.termSize = TermSize.sizeOf(comparator);
         this.mode = mode;
+        this.marksPartials = marksPartials;
     }
 
     public OnDiskIndexBuilder add(ByteBuffer term, DecoratedKey key, long 
keyPosition)
@@ -269,6 +278,7 @@ public class OnDiskIndexBuilder
             ByteBufferUtil.writeWithShortLength(range.right, out);
 
             out.writeUTF(mode.toString());
+            out.writeBoolean(marksPartials);
 
             out.skipBytes((int) (BLOCK_SIZE - out.position()));
 
@@ -276,7 +286,7 @@ public class OnDiskIndexBuilder
                                             : new MutableLevel<>(out, new 
MutableDataBlock(termComparator, mode));
             while (terms.hasNext())
             {
-                Pair<ByteBuffer, TokenTreeBuilder> term = terms.next();
+                Pair<IndexedTerm, TokenTreeBuilder> term = terms.next();
                 addTerm(new InMemoryDataTerm(term.left, term.right), out);
             }
 
@@ -333,24 +343,30 @@ public class OnDiskIndexBuilder
 
     private class InMemoryTerm
     {
-        protected final ByteBuffer term;
+        protected final IndexedTerm term;
 
-        public InMemoryTerm(ByteBuffer term)
+        public InMemoryTerm(IndexedTerm term)
         {
             this.term = term;
         }
 
         public int serializedSize()
         {
-            return (termSize.isConstant() ? 0 : 2) + term.remaining();
+            return (termSize.isConstant() ? 0 : 2) + 
term.getBytes().remaining();
         }
 
         public void serialize(DataOutputPlus out) throws IOException
         {
             if (termSize.isConstant())
-                out.write(term);
+            {
+                out.write(term.getBytes());
+            }
             else
-                ByteBufferUtil.writeWithShortLength(term, out);
+            {
+                out.writeShort(term.getBytes().remaining() | ((marksPartials 
&& term.isPartial() ? 1 : 0) << IS_PARTIAL_BIT));
+                out.write(term.getBytes());
+            }
+
         }
     }
 
@@ -358,7 +374,7 @@ public class OnDiskIndexBuilder
     {
         protected final int blockCnt;
 
-        public InMemoryPointerTerm(ByteBuffer term, int blockCnt)
+        public InMemoryPointerTerm(IndexedTerm term, int blockCnt)
         {
             super(term);
             this.blockCnt = blockCnt;
@@ -380,7 +396,7 @@ public class OnDiskIndexBuilder
     {
         private final TokenTreeBuilder keys;
 
-        public InMemoryDataTerm(ByteBuffer term, TokenTreeBuilder keys)
+        public InMemoryDataTerm(IndexedTerm term, TokenTreeBuilder keys)
         {
             super(term);
             this.keys = keys;
@@ -577,7 +593,7 @@ public class OnDiskIndexBuilder
             {
                 if (keys.getTokenCount() > MAX_KEYS_SPARSE)
                     throw new IOException(String.format("Term - '%s' belongs 
to more than %d keys in %s mode, which is not allowed.",
-                                                        
comparator.getString(term.term), MAX_KEYS_SPARSE, mode.name()));
+                                                        
comparator.getString(term.term.getBytes()), MAX_KEYS_SPARSE, mode.name()));
 
                 writeTerm(term, keys);
             }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java 
b/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java
index 7c72bb7..ca60ac5 100644
--- a/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java
+++ b/src/java/org/apache/cassandra/index/sasi/memory/TrieMemIndex.java
@@ -228,6 +228,7 @@ public class TrieMemIndex extends MemIndex
                 case SUFFIX:
                     return trie.getValuesForKeysEndingWith(value);
 
+                case PREFIX:
                 case CONTAINS:
                     return trie.getValuesForKeysContaining(value);
 

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java 
b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
index f2fd02a..ce420d1 100644
--- a/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
+++ b/src/java/org/apache/cassandra/index/sasi/plan/Expression.java
@@ -322,6 +322,9 @@ public class Expression
         if (!hasLower())
             return true;
 
+        if (nonMatchingPartial(term))
+            return false;
+
         int cmp = term.compareTo(validator, lower.value, false);
         return cmp > 0 || cmp == 0 && lower.inclusive;
     }
@@ -331,6 +334,9 @@ public class Expression
         if (!hasUpper())
             return true;
 
+        if (nonMatchingPartial(term))
+            return false;
+
         int cmp = term.compareTo(validator, upper.value, false);
         return cmp < 0 || cmp == 0 && upper.inclusive;
     }
@@ -379,6 +385,10 @@ public class Expression
                 && exclusions.equals(o.exclusions);
     }
 
+    private boolean nonMatchingPartial(OnDiskIndex.DataTerm term)
+    {
+        return term.isPartial() && operation == Op.PREFIX;
+    }
 
     public static class Bound
     {

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/sa/IndexedTerm.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/sa/IndexedTerm.java 
b/src/java/org/apache/cassandra/index/sasi/sa/IndexedTerm.java
new file mode 100644
index 0000000..8e27134
--- /dev/null
+++ b/src/java/org/apache/cassandra/index/sasi/sa/IndexedTerm.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.index.sasi.sa;
+
+import java.nio.ByteBuffer;
+
+public class IndexedTerm
+{
+    private final ByteBuffer term;
+    private final boolean isPartial;
+
+    public IndexedTerm(ByteBuffer term, boolean isPartial)
+    {
+        this.term = term;
+        this.isPartial = isPartial;
+    }
+
+    public ByteBuffer getBytes()
+    {
+        return term;
+    }
+
+    public boolean isPartial()
+    {
+        return isPartial;
+    }
+}

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/sa/IntegralSA.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/sa/IntegralSA.java 
b/src/java/org/apache/cassandra/index/sasi/sa/IntegralSA.java
index 8356585..e3d591f 100644
--- a/src/java/org/apache/cassandra/index/sasi/sa/IntegralSA.java
+++ b/src/java/org/apache/cassandra/index/sasi/sa/IntegralSA.java
@@ -22,6 +22,7 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.Iterator;
 
+import org.apache.cassandra.index.Index;
 import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
 import org.apache.cassandra.index.sasi.disk.TokenTreeBuilder;
 import org.apache.cassandra.db.marshal.AbstractType;
@@ -72,13 +73,13 @@ public class IntegralSA extends SA<ByteBuffer>
             return terms.get(terms.size() - 1).getTerm();
         }
 
-        protected Pair<ByteBuffer, TokenTreeBuilder> computeNext()
+        protected Pair<IndexedTerm, TokenTreeBuilder> computeNext()
         {
             if (!termIterator.hasNext())
                 return endOfData();
 
             Term<ByteBuffer> term = termIterator.next();
-            return Pair.create(term.getTerm(), term.getTokens().finish());
+            return Pair.create(new IndexedTerm(term.getTerm(), false), 
term.getTokens().finish());
         }
     }
 }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/sa/SuffixSA.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/sa/SuffixSA.java 
b/src/java/org/apache/cassandra/index/sasi/sa/SuffixSA.java
index 592299e..59c50b4 100644
--- a/src/java/org/apache/cassandra/index/sasi/sa/SuffixSA.java
+++ b/src/java/org/apache/cassandra/index/sasi/sa/SuffixSA.java
@@ -48,10 +48,13 @@ public class SuffixSA extends SA<CharBuffer>
 
     private class SASuffixIterator extends TermIterator
     {
+
+        private static final int COMPLETE_BIT = 31;
+
         private final long[] suffixes;
 
         private int current = 0;
-        private ByteBuffer lastProcessedSuffix;
+        private IndexedTerm lastProcessedSuffix;
         private TokenTreeBuilder container;
 
         public SASuffixIterator()
@@ -61,43 +64,55 @@ public class SuffixSA extends SA<CharBuffer>
             suffixes = new long[charCount];
 
             long termIndex = -1, currentTermLength = -1;
+            boolean isComplete = false;
             for (int i = 0; i < charCount; i++)
             {
                 if (i >= currentTermLength || currentTermLength == -1)
                 {
                     Term currentTerm = terms.get((int) ++termIndex);
                     currentTermLength = currentTerm.getPosition() + 
currentTerm.length();
+                    isComplete = true;
                 }
 
                 suffixes[i] = (termIndex << 32) | i;
+                if (isComplete)
+                    suffixes[i] |= (1L << COMPLETE_BIT);
+
+                isComplete = false;
             }
 
             Primitive.sort(suffixes, (a, b) -> {
                 Term aTerm = terms.get((int) (a >>> 32));
                 Term bTerm = terms.get((int) (b >>> 32));
-                return comparator.compare(aTerm.getSuffix(((int) a) - 
aTerm.getPosition()),
-                                          bTerm.getSuffix(((int) b) - 
bTerm.getPosition()));
+                return comparator.compare(aTerm.getSuffix(clearCompleteBit(a) 
- aTerm.getPosition()),
+                                          bTerm.getSuffix(clearCompleteBit(b) 
- bTerm.getPosition()));
             });
         }
 
-        private Pair<ByteBuffer, TokenTreeBuilder> suffixAt(int position)
+        private int clearCompleteBit(long value)
+        {
+            return (int) (value & ~(1L << COMPLETE_BIT));
+        }
+
+        private Pair<IndexedTerm, TokenTreeBuilder> suffixAt(int position)
         {
             long index = suffixes[position];
             Term term = terms.get((int) (index >>> 32));
-            return Pair.create(term.getSuffix(((int) index) - 
term.getPosition()), term.getTokens());
+            boolean isPartitial = (index & ((long) 1 << 31)) == 0;
+            return Pair.create(new 
IndexedTerm(term.getSuffix(clearCompleteBit(index) - term.getPosition()), 
isPartitial), term.getTokens());
         }
 
         public ByteBuffer minTerm()
         {
-            return suffixAt(0).left;
+            return suffixAt(0).left.getBytes();
         }
 
         public ByteBuffer maxTerm()
         {
-            return suffixAt(suffixes.length - 1).left;
+            return suffixAt(suffixes.length - 1).left.getBytes();
         }
 
-        protected Pair<ByteBuffer, TokenTreeBuilder> computeNext()
+        protected Pair<IndexedTerm, TokenTreeBuilder> computeNext()
         {
             while (true)
             {
@@ -106,27 +121,27 @@ public class SuffixSA extends SA<CharBuffer>
                     if (lastProcessedSuffix == null)
                         return endOfData();
 
-                    Pair<ByteBuffer, TokenTreeBuilder> result = finishSuffix();
+                    Pair<IndexedTerm, TokenTreeBuilder> result = 
finishSuffix();
 
                     lastProcessedSuffix = null;
                     return result;
                 }
 
-                Pair<ByteBuffer, TokenTreeBuilder> suffix = 
suffixAt(current++);
+                Pair<IndexedTerm, TokenTreeBuilder> suffix = 
suffixAt(current++);
 
                 if (lastProcessedSuffix == null)
                 {
                     lastProcessedSuffix = suffix.left;
                     container = new DynamicTokenTreeBuilder(suffix.right);
                 }
-                else if (comparator.compare(lastProcessedSuffix, suffix.left) 
== 0)
+                else if (comparator.compare(lastProcessedSuffix.getBytes(), 
suffix.left.getBytes()) == 0)
                 {
                     lastProcessedSuffix = suffix.left;
                     container.add(suffix.right);
                 }
                 else
                 {
-                    Pair<ByteBuffer, TokenTreeBuilder> result = finishSuffix();
+                    Pair<IndexedTerm, TokenTreeBuilder> result = 
finishSuffix();
 
                     lastProcessedSuffix = suffix.left;
                     container = new DynamicTokenTreeBuilder(suffix.right);
@@ -136,7 +151,7 @@ public class SuffixSA extends SA<CharBuffer>
             }
         }
 
-        private Pair<ByteBuffer, TokenTreeBuilder> finishSuffix()
+        private Pair<IndexedTerm, TokenTreeBuilder> finishSuffix()
         {
             return Pair.create(lastProcessedSuffix, container.finish());
         }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/sa/TermIterator.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/sa/TermIterator.java 
b/src/java/org/apache/cassandra/index/sasi/sa/TermIterator.java
index 916aa07..c8572a9 100644
--- a/src/java/org/apache/cassandra/index/sasi/sa/TermIterator.java
+++ b/src/java/org/apache/cassandra/index/sasi/sa/TermIterator.java
@@ -24,7 +24,7 @@ import org.apache.cassandra.utils.Pair;
 
 import com.google.common.collect.AbstractIterator;
 
-public abstract class TermIterator extends AbstractIterator<Pair<ByteBuffer, 
TokenTreeBuilder>>
+public abstract class TermIterator extends AbstractIterator<Pair<IndexedTerm, 
TokenTreeBuilder>>
 {
     public abstract ByteBuffer minTerm();
     public abstract ByteBuffer maxTerm();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/utils/CombinedTerm.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/index/sasi/utils/CombinedTerm.java 
b/src/java/org/apache/cassandra/index/sasi/utils/CombinedTerm.java
index ba7123a..81e535d 100644
--- a/src/java/org/apache/cassandra/index/sasi/utils/CombinedTerm.java
+++ b/src/java/org/apache/cassandra/index/sasi/utils/CombinedTerm.java
@@ -41,6 +41,11 @@ public class CombinedTerm implements CombinedValue<DataTerm>
         return term.getTerm();
     }
 
+    public boolean isPartial()
+    {
+        return term.isPartial();
+    }
+
     public RangeIterator<Long, Token> getTokenIterator()
     {
         RangeIterator.Builder<Long, Token> union = 
RangeUnionIterator.builder();

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/src/java/org/apache/cassandra/index/sasi/utils/CombinedTermIterator.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/index/sasi/utils/CombinedTermIterator.java 
b/src/java/org/apache/cassandra/index/sasi/utils/CombinedTermIterator.java
index c29de06..4b004e0 100644
--- a/src/java/org/apache/cassandra/index/sasi/utils/CombinedTermIterator.java
+++ b/src/java/org/apache/cassandra/index/sasi/utils/CombinedTermIterator.java
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer;
 import org.apache.cassandra.index.sasi.disk.Descriptor;
 import org.apache.cassandra.index.sasi.disk.OnDiskIndex;
 import org.apache.cassandra.index.sasi.disk.TokenTreeBuilder;
+import org.apache.cassandra.index.sasi.sa.IndexedTerm;
 import org.apache.cassandra.index.sasi.sa.TermIterator;
 import org.apache.cassandra.db.marshal.AbstractType;
 import org.apache.cassandra.utils.Pair;
@@ -72,7 +73,7 @@ public class CombinedTermIterator extends TermIterator
         return max;
     }
 
-    protected Pair<ByteBuffer, TokenTreeBuilder> computeNext()
+    protected Pair<IndexedTerm, TokenTreeBuilder> computeNext()
     {
         if (!union.hasNext())
         {
@@ -81,7 +82,7 @@ public class CombinedTermIterator extends TermIterator
         else
         {
             CombinedTerm term = union.next();
-            return Pair.create(term.getTerm(), term.getTokenTreeBuilder());
+            return Pair.create(new IndexedTerm(term.getTerm(), 
term.isPartial()), term.getTokenTreeBuilder());
         }
 
     }

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java 
b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
index b3d641c..c44c48c 100644
--- a/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/SASIIndexTest.java
@@ -1789,30 +1789,43 @@ public class SASIIndexTest
         String containsTable = "sasi_like_contains_test";
         String prefixTable = "sasi_like_prefix_test";
         String analyzedPrefixTable = "sasi_like_analyzed_prefix_test";
+        String tokenizedContainsTable = "sasi_like_analyzed_contains_test";
 
         QueryProcessor.executeOnceInternal(String.format("CREATE TABLE IF NOT 
EXISTS %s.%s (k int primary key, v text);", KS_NAME, containsTable));
         QueryProcessor.executeOnceInternal(String.format("CREATE TABLE IF NOT 
EXISTS %s.%s (k int primary key, v text);", KS_NAME, prefixTable));
         QueryProcessor.executeOnceInternal(String.format("CREATE TABLE IF NOT 
EXISTS %s.%s (k int primary key, v text);", KS_NAME, analyzedPrefixTable));
+        QueryProcessor.executeOnceInternal(String.format("CREATE TABLE IF NOT 
EXISTS %s.%s (k int primary key, v text);", KS_NAME, tokenizedContainsTable));
 
         QueryProcessor.executeOnceInternal(String.format("CREATE CUSTOM INDEX 
IF NOT EXISTS ON %s.%s(v) " +
-                "USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH 
OPTIONS = { 'mode' : 'CONTAINS' };", KS_NAME, containsTable));
+                "USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH 
OPTIONS = { 'mode' : 'CONTAINS', " +
+                                                         "'analyzer_class': 
'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', " +
+                                                         "'case_sensitive': 
'false' };",
+                                                         KS_NAME, 
containsTable));
         QueryProcessor.executeOnceInternal(String.format("CREATE CUSTOM INDEX 
IF NOT EXISTS ON %s.%s(v) " +
                 "USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH 
OPTIONS = { 'mode' : 'PREFIX' };", KS_NAME, prefixTable));
         QueryProcessor.executeOnceInternal(String.format("CREATE CUSTOM INDEX 
IF NOT EXISTS ON %s.%s(v) " +
                 "USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH 
OPTIONS = { 'mode' : 'PREFIX', 'analyzed': 'true' };", KS_NAME, 
analyzedPrefixTable));
-
-        testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(containsTable, 
prefixTable, analyzedPrefixTable, false);
-        testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(containsTable, 
prefixTable, analyzedPrefixTable, true);
+        QueryProcessor.executeOnceInternal(String.format("CREATE CUSTOM INDEX 
IF NOT EXISTS ON %s.%s(v) " +
+                                                         "USING 
'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS = " +
+                                                         "{ 'mode' : 
'CONTAINS', 'analyzer_class': 
'org.apache.cassandra.index.sasi.analyzer.StandardAnalyzer'," +
+                                                         "'analyzed': 'true', 
'tokenization_enable_stemming': 'true', 'tokenization_normalize_lowercase': 
'true', " +
+                                                         
"'tokenization_locale': 'en' };",
+                                                         KS_NAME, 
tokenizedContainsTable));
+
+        testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(containsTable, 
prefixTable, analyzedPrefixTable, tokenizedContainsTable, false);
+        testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(containsTable, 
prefixTable, analyzedPrefixTable, tokenizedContainsTable, true);
     }
 
     private void testLIKEAndEQSemanticsWithDifferenceKindsOfIndexes(String 
containsTable,
                                                                     String 
prefixTable,
                                                                     String 
analyzedPrefixTable,
+                                                                    String 
tokenizedContainsTable,
                                                                     boolean 
forceFlush)
     {
         QueryProcessor.executeOnceInternal(String.format("INSERT INTO %s.%s 
(k, v) VALUES (?, ?);", KS_NAME, containsTable), 0, "Pavel");
         QueryProcessor.executeOnceInternal(String.format("INSERT INTO %s.%s 
(k, v) VALUES (?, ?);", KS_NAME, prefixTable), 0, "Jean-Claude");
         QueryProcessor.executeOnceInternal(String.format("INSERT INTO %s.%s 
(k, v) VALUES (?, ?);", KS_NAME, analyzedPrefixTable), 0, "Jean-Claude");
+        QueryProcessor.executeOnceInternal(String.format("INSERT INTO %s.%s 
(k, v) VALUES (?, ?);", KS_NAME, tokenizedContainsTable), 0, "Pavel");
 
         if (forceFlush)
         {
@@ -1829,30 +1842,46 @@ public class SASIIndexTest
         Assert.assertNotNull(results);
         Assert.assertEquals(0, results.size());
 
+        results = QueryProcessor.executeOnceInternal(String.format("SELECT * 
FROM %s.%s WHERE v LIKE 'Pav%%';", KS_NAME, containsTable));
+        Assert.assertNotNull(results);
+        Assert.assertEquals(1, results.size());
+
         results = QueryProcessor.executeOnceInternal(String.format("SELECT * 
FROM %s.%s WHERE v LIKE 'Pavel';", KS_NAME, containsTable));
         Assert.assertNotNull(results);
         Assert.assertEquals(1, results.size());
 
+        results = QueryProcessor.executeOnceInternal(String.format("SELECT * 
FROM %s.%s WHERE v = 'Pav';", KS_NAME, containsTable));
+        Assert.assertNotNull(results);
+        Assert.assertEquals(0, results.size());
+
+        results = QueryProcessor.executeOnceInternal(String.format("SELECT * 
FROM %s.%s WHERE v = 'Pavel';", KS_NAME, containsTable));
+        Assert.assertNotNull(results);
+        Assert.assertEquals(1, results.size());
+
         try
         {
-            QueryProcessor.executeOnceInternal(String.format("SELECT * FROM 
%s.%s WHERE v = 'Pav';", KS_NAME, containsTable));
+            QueryProcessor.executeOnceInternal(String.format("SELECT * FROM 
%s.%s WHERE v = 'Pav';", KS_NAME, tokenizedContainsTable));
             Assert.fail();
         }
         catch (InvalidRequestException e)
         {
-            // expected since CONTAINS indexes only support LIKE
+            // expected since CONTAINS + analyzed indexes only support LIKE
         }
 
         try
         {
-            QueryProcessor.executeOnceInternal(String.format("SELECT * FROM 
%s.%s WHERE v LIKE 'Pav%%';", KS_NAME, containsTable));
+            QueryProcessor.executeOnceInternal(String.format("SELECT * FROM 
%s.%s WHERE v LIKE 'Pav%%';", KS_NAME, tokenizedContainsTable));
             Assert.fail();
         }
         catch (InvalidRequestException e)
         {
-            // expected since CONTAINS indexes only support LIKE '%<term>' and 
LIKE '%<term>%'
+            // expected since CONTAINS + analyzed only support LIKE
         }
 
+        QueryProcessor.executeOnceInternal(String.format("SELECT * FROM %s.%s 
WHERE v LIKE 'Pav%%';", KS_NAME, containsTable));
+        Assert.assertNotNull(results);
+        Assert.assertEquals(1, results.size());
+
         results = QueryProcessor.executeOnceInternal(String.format("SELECT * 
FROM %s.%s WHERE v LIKE '%%Pav';", KS_NAME, containsTable));
         Assert.assertNotNull(results);
         Assert.assertEquals(0, results.size());

http://git-wip-us.apache.org/repos/asf/cassandra/blob/2ca2fffe/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
----------------------------------------------------------------------
diff --git 
a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java 
b/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
index 628bd36..bac23ea 100644
--- a/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
+++ b/test/unit/org/apache/cassandra/index/sasi/disk/OnDiskIndexTest.java
@@ -65,6 +65,7 @@ public class OnDiskIndexTest
                 put(UTF8Type.instance.decompose("foo"),  keyBuilder(7L));
                 put(UTF8Type.instance.decompose("bar"),  keyBuilder(9L, 10L));
                 put(UTF8Type.instance.decompose("michael"), keyBuilder(11L, 
12L, 1L));
+                put(UTF8Type.instance.decompose("am"), keyBuilder(15L));
         }};
 
         OnDiskIndexBuilder builder = new OnDiskIndexBuilder(UTF8Type.instance, 
UTF8Type.instance, OnDiskIndexBuilder.Mode.CONTAINS);
@@ -100,6 +101,7 @@ public class OnDiskIndexTest
         Assert.assertEquals(convert(7), 
convert(onDisk.search(expressionFor("oo"))));
         Assert.assertEquals(convert(7), 
convert(onDisk.search(expressionFor("o"))));
         Assert.assertEquals(convert(1, 2, 3, 4, 6), 
convert(onDisk.search(expressionFor("t"))));
+        Assert.assertEquals(convert(1, 2, 11, 12), 
convert(onDisk.search(expressionFor("m", Operator.LIKE_PREFIX))));
 
         Assert.assertEquals(Collections.<DecoratedKey>emptySet(), 
convert(onDisk.search(expressionFor("hello"))));
 
@@ -851,7 +853,12 @@ public class OnDiskIndexTest
 
     private static Expression expressionFor(String term)
     {
-        return expressionFor(UTF8Type.instance, 
UTF8Type.instance.decompose(term));
+        return expressionFor(term, Operator.LIKE_CONTAINS);
+    }
+
+    private static Expression expressionFor(String term, Operator op)
+    {
+        return expressionFor(op, UTF8Type.instance, 
UTF8Type.instance.decompose(term));
     }
 
     private static void addAll(OnDiskIndexBuilder builder, ByteBuffer term, 
TokenTreeBuilder tokens)

Reply via email to