Updated Branches: refs/heads/cassandra-1.2 8c2a28050 -> 1a8f7230a
Fix sometimes skipping range tombstones during reverse queries patch by slebresne; reviewed by jbellis for CASSANDRA-5712 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/1a8f7230 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/1a8f7230 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/1a8f7230 Branch: refs/heads/cassandra-1.2 Commit: 1a8f7230a1b56d8e58c33ef2922f4460e7b6f913 Parents: 8c2a280 Author: Sylvain Lebresne <sylv...@datastax.com> Authored: Mon Jul 1 09:29:48 2013 +0200 Committer: Sylvain Lebresne <sylv...@datastax.com> Committed: Mon Jul 1 09:32:21 2013 +0200 ---------------------------------------------------------------------- CHANGES.txt | 3 +- .../db/columniterator/IndexedSliceReader.java | 28 ++++++++++++++- .../apache/cassandra/db/RangeTombstoneTest.java | 37 ++++++++++++++++++++ 3 files changed, 66 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/1a8f7230/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 1b9634c..843bb53 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -3,8 +3,9 @@ * Fix serialization of the LEFT gossip value (CASSANDRA-5696) * Pig: support for cql3 tables (CASSANDRA-5234) * cqlsh: Don't show 'null' in place of empty values (CASSANDRA-5675) - * Race condition in detecting version on a mixed 1.1/1.2 cluster + * Race condition in detecting version on a mixed 1.1/1.2 cluster (CASSANDRA-5692) + * Fix skipping range tombstones with reverse queries (CASSANDRA-5712) 1.2.6 http://git-wip-us.apache.org/repos/asf/cassandra/blob/1a8f7230/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java index 4ca0ea5..21eb48b 100644 --- a/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java +++ b/src/java/org/apache/cassandra/db/columniterator/IndexedSliceReader.java @@ -29,6 +29,7 @@ import org.apache.cassandra.db.ColumnFamily; import org.apache.cassandra.db.DecoratedKey; import org.apache.cassandra.db.DeletionInfo; import org.apache.cassandra.db.OnDiskAtom; +import org.apache.cassandra.db.RangeTombstone; import org.apache.cassandra.db.RowIndexEntry; import org.apache.cassandra.db.filter.ColumnSlice; import org.apache.cassandra.db.marshal.AbstractType; @@ -60,6 +61,9 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA private final Deque<OnDiskAtom> blockColumns = new ArrayDeque<OnDiskAtom>(); private final AbstractType<?> comparator; + // Holds range tombstone in reverse queries. See addColumn() + private final Deque<OnDiskAtom> rangeTombstonesReversed; + /** * This slice reader assumes that slices are sorted correctly, e.g. that for forward lookup slices are in * lexicographic order of start elements and that for reverse lookup they are in reverse lexicographic order of @@ -74,6 +78,7 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA this.reversed = reversed; this.slices = slices; this.comparator = sstable.metadata.comparator; + this.rangeTombstonesReversed = reversed ? new ArrayDeque<OnDiskAtom>() : null; try { @@ -147,6 +152,14 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA { while (true) { + if (reversed) + { + // Return all tombstone for the block first (see addColumn() below) + OnDiskAtom column = rangeTombstonesReversed.poll(); + if (column != null) + return column; + } + OnDiskAtom column = blockColumns.poll(); if (column == null) { @@ -169,9 +182,22 @@ class IndexedSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskA protected void addColumn(OnDiskAtom col) { if (reversed) - blockColumns.addFirst(col); + { + /* + * We put range tomstone markers at the beginning of the range they delete. But for reversed queries, + * the caller still need to know about a RangeTombstone before it sees any column that it covers. + * To make that simple, we keep said tombstones separate and return them all before any column for + * a given block. + */ + if (col instanceof RangeTombstone) + rangeTombstonesReversed.addFirst(col); + else + blockColumns.addFirst(col); + } else + { blockColumns.addLast(col); + } } private abstract class BlockFetcher http://git-wip-us.apache.org/repos/asf/cassandra/blob/1a8f7230/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java b/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java index c531461..c2f8b83 100644 --- a/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java +++ b/test/unit/org/apache/cassandra/db/RangeTombstoneTest.java @@ -154,6 +154,38 @@ public class RangeTombstoneTest extends SchemaLoader assert !isLive(cf, cf.getColumn(b(i))) : "Column " + i + " shouldn't be live"; } + @Test + public void reverseQueryTest() throws Exception + { + Table table = Table.open(KSNAME); + ColumnFamilyStore cfs = table.getColumnFamilyStore(CFNAME); + + // Inserting data + String key = "k3"; + RowMutation rm; + ColumnFamily cf; + + rm = new RowMutation(KSNAME, ByteBufferUtil.bytes(key)); + add(rm, 2, 0); + rm.apply(); + cfs.forceBlockingFlush(); + + rm = new RowMutation(KSNAME, ByteBufferUtil.bytes(key)); + // Deletes everything but without being a row tombstone + delete(rm.addOrGet(CFNAME), 0, 10, 1); + add(rm, 1, 2); + rm.apply(); + cfs.forceBlockingFlush(); + + // Get the last value of the row + QueryPath path = new QueryPath(CFNAME); + cf = cfs.getColumnFamily(QueryFilter.getSliceFilter(dk(key), path, ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, true, 1)); + + assert !cf.isEmpty(); + int last = i(cf.getSortedColumns().iterator().next().name()); + assert last == 1 : "Last column should be column 1 since column 2 has been deleted"; + } + private static boolean isLive(ColumnFamily cf, IColumn c) { return c != null && !c.isMarkedForDelete() && !cf.deletionInfo().isDeleted(c); @@ -164,6 +196,11 @@ public class RangeTombstoneTest extends SchemaLoader return ByteBufferUtil.bytes(i); } + private static int i(ByteBuffer i) + { + return ByteBufferUtil.toInt(i); + } + private static void add(RowMutation rm, int value, long timestamp) { rm.add(new QueryPath(CFNAME, null, b(value)), b(value), timestamp);