[1/2] git commit: Fix sometimes skipping range tombstones during reverse queries

slebresne Mon, 01 Jul 2013 00:38:33 -0700

Updated Branches:
  refs/heads/trunk 09ea74620 -> 4b8897327


Fix sometimes skipping range tombstones during reverse queries

patch by slebresne; reviewed by jbellis for CASSANDRA-5712


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/1a8f7230
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/1a8f7230
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/1a8f7230

Branch: refs/heads/trunk
Commit: 1a8f7230a1b56d8e58c33ef2922f4460e7b6f913
Parents: 8c2a280
Author: Sylvain Lebresne <sylv...@datastax.com>
Authored: Mon Jul 1 09:29:48 2013 +0200
Committer: Sylvain Lebresne <sylv...@datastax.com>
Committed: Mon Jul 1 09:32:21 2013 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 +-
 .../db/columniterator/IndexedSliceReader.java   | 28 ++++++++++++++-
 .../apache/cassandra/db/RangeTombstoneTest.java | 37 ++++++++++++++++++++
 3 files changed, 66 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/1a8f7230/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1b9634c..843bb53 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -3,8 +3,9 @@
  * Fix serialization of the LEFT gossip value (CASSANDRA-5696)
  * Pig: support for cql3 tables (CASSANDRA-5234)
  * cqlsh: Don't show 'null' in place of empty values (CASSANDRA-5675)
- * Race condition in detecting version on a mixed 1.1/1.2 cluster 
+ * Race condition in detecting version on a mixed 1.1/1.2 cluster
    (CASSANDRA-5692)
+ * Fix skipping range tombstones with reverse queries (CASSANDRA-5712)
 
 
 1.2.6

http://git-wip-us.apache.org/repos/asf/cassandra/blob/1a8f7230/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
----------------------------------------------------------------------
diff --git 
a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java 
b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
index 4ca0ea5..21eb48b 100644
--- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
+++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java
@@ -29,6 +29,7 @@ import org.apache.cassandra.db.ColumnFamily;
 import org.apache.cassandra.db.DecoratedKey;
 import org.apache.cassandra.db.DeletionInfo;
 import org.apache.cassandra.db.OnDiskAtom;
+import org.apache.cassandra.db.RangeTombstone;
 import org.apache.cassandra.db.RowIndexEntry;
 import org.apache.cassandra.db.filter.ColumnSlice;
 import org.apache.cassandra.db.marshal.AbstractType;
@@ -60,6 +61,9 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> 
implements OnDiskA
     private final Deque<OnDiskAtom> blockColumns = new 
ArrayDeque<OnDiskAtom>();
     private final AbstractType<?> comparator;
 
+    // Holds range tombstone in reverse queries. See addColumn()
+    private final Deque<OnDiskAtom> rangeTombstonesReversed;
+
     /**
      * This slice reader assumes that slices are sorted correctly, e.g. that 
for forward lookup slices are in
      * lexicographic order of start elements and that for reverse lookup they 
are in reverse lexicographic order of
@@ -74,6 +78,7 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> 
implements OnDiskA
         this.reversed = reversed;
         this.slices = slices;
         this.comparator = sstable.metadata.comparator;
+        this.rangeTombstonesReversed = reversed ? new ArrayDeque<OnDiskAtom>() 
: null;
 
         try
         {
@@ -147,6 +152,14 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
     {
         while (true)
         {
+            if (reversed)
+            {
+                // Return all tombstone for the block first (see addColumn() 
below)
+                OnDiskAtom column = rangeTombstonesReversed.poll();
+                if (column != null)
+                    return column;
+            }
+
             OnDiskAtom column = blockColumns.poll();
             if (column == null)
             {
@@ -169,9 +182,22 @@ class IndexedSliceReader extends 
AbstractIterator<OnDiskAtom> implements OnDiskA
     protected void addColumn(OnDiskAtom col)
     {
         if (reversed)
-            blockColumns.addFirst(col);
+        {
+            /*
+             * We put range tomstone markers at the beginning of the range 
they delete. But for reversed queries,
+             * the caller still need to know about a RangeTombstone before it 
sees any column that it covers.
+             * To make that simple, we keep said tombstones separate and 
return them all before any column for
+             * a given block.
+             */
+            if (col instanceof RangeTombstone)
+                rangeTombstonesReversed.addFirst(col);
+            else
+                blockColumns.addFirst(col);
+        }
         else
+        {
             blockColumns.addLast(col);
+        }
     }
 
     private abstract class BlockFetcher

http://git-wip-us.apache.org/repos/asf/cassandra/blob/1a8f7230/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
----------------------------------------------------------------------
diff --git a/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java 
b/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
index c531461..c2f8b83 100644
--- a/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
+++ b/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java
@@ -154,6 +154,38 @@ public class RangeTombstoneTest extends SchemaLoader
             assert !isLive(cf, cf.getColumn(b(i))) : "Column " + i + " 
shouldn't be live";
     }
 
+    @Test
+    public void reverseQueryTest() throws Exception
+    {
+        Table table = Table.open(KSNAME);
+        ColumnFamilyStore cfs = table.getColumnFamilyStore(CFNAME);
+
+        // Inserting data
+        String key = "k3";
+        RowMutation rm;
+        ColumnFamily cf;
+
+        rm = new RowMutation(KSNAME, ByteBufferUtil.bytes(key));
+        add(rm, 2, 0);
+        rm.apply();
+        cfs.forceBlockingFlush();
+
+        rm = new RowMutation(KSNAME, ByteBufferUtil.bytes(key));
+        // Deletes everything but without being a row tombstone
+        delete(rm.addOrGet(CFNAME), 0, 10, 1);
+        add(rm, 1, 2);
+        rm.apply();
+        cfs.forceBlockingFlush();
+
+        // Get the last value of the row
+        QueryPath path = new QueryPath(CFNAME);
+        cf = cfs.getColumnFamily(QueryFilter.getSliceFilter(dk(key), path, 
ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, true, 1));
+
+        assert !cf.isEmpty();
+        int last = i(cf.getSortedColumns().iterator().next().name());
+        assert last == 1 : "Last column should be column 1 since column 2 has 
been deleted";
+    }
+
     private static boolean isLive(ColumnFamily cf, IColumn c)
     {
         return c != null && !c.isMarkedForDelete() && 
!cf.deletionInfo().isDeleted(c);
@@ -164,6 +196,11 @@ public class RangeTombstoneTest extends SchemaLoader
         return ByteBufferUtil.bytes(i);
     }
 
+    private static int i(ByteBuffer i)
+    {
+        return ByteBufferUtil.toInt(i);
+    }
+
     private static void add(RowMutation rm, int value, long timestamp)
     {
         rm.add(new QueryPath(CFNAME, null, b(value)), b(value), timestamp);

[1/2] git commit: Fix sometimes skipping range tombstones during reverse queries

Reply via email to