Fix assertion error when reading static on an indexed sstable patch by slebresne; reviewed by blambov for CASSANDRA-10903
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/8bc567b3 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/8bc567b3 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/8bc567b3 Branch: refs/heads/trunk Commit: 8bc567b3ca40199f6d4a7b6f32971f2fea56a6b9 Parents: 796db6e Author: Sylvain Lebresne <sylv...@datastax.com> Authored: Mon Dec 21 12:14:46 2015 +0100 Committer: Sylvain Lebresne <sylv...@datastax.com> Committed: Wed Dec 23 15:15:14 2015 +0100 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../columniterator/AbstractSSTableIterator.java | 37 ++++----- .../db/columniterator/SSTableIterator.java | 38 +++------ .../columniterator/SSTableReversedIterator.java | 28 ++----- .../cql3/QueryWithIndexedSSTableTest.java | 84 ++++++++++++++++++++ 5 files changed, 121 insertions(+), 67 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/8bc567b3/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 7794c96..a669b17 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0.3 + * Fix potential assertion error when reading static columns (CASSANDRA-0903) * Avoid NoSuchElementException when executing empty batch (CASSANDRA-10711) * Avoid building PartitionUpdate in toString (CASSANDRA-10897) * Reduce heap spent when receiving many SSTables (CASSANDRA-10797) http://git-wip-us.apache.org/repos/asf/cassandra/blob/8bc567b3/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java b/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java index 5f280d7..8ac3dcb 100644 --- a/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java +++ b/src/java/org/apache/cassandra/db/columniterator/AbstractSSTableIterator.java @@ -99,14 +99,14 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator // Note that this needs to be called after file != null and after the partitionDeletion has been set, but before readStaticRow // (since it uses it) so we can't move that up (but we'll be able to simplify as soon as we drop support for the old file format). - this.reader = needsReader ? createReader(indexEntry, file, true, shouldCloseFile) : null; + this.reader = needsReader ? createReader(indexEntry, file, shouldCloseFile) : null; this.staticRow = readStaticRow(sstable, file, helper, columns.fetchedColumns().statics, isForThrift, reader == null ? null : reader.deserializer); } else { this.partitionLevelDeletion = indexEntry.deletionTime(); this.staticRow = Rows.EMPTY_STATIC_ROW; - this.reader = needsReader ? createReader(indexEntry, file, false, shouldCloseFile) : null; + this.reader = needsReader ? createReader(indexEntry, file, shouldCloseFile) : null; } if (reader == null && file != null && shouldCloseFile) @@ -180,7 +180,7 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator } } - protected abstract Reader createReader(RowIndexEntry indexEntry, FileDataInput file, boolean isAtPartitionStart, boolean shouldCloseFile); + protected abstract Reader createReader(RowIndexEntry indexEntry, FileDataInput file, boolean shouldCloseFile); public CFMetaData metadata() { @@ -291,19 +291,13 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator // Records the currently open range tombstone (if any) protected DeletionTime openMarker = null; - // !isInit means we have never seeked in the file and thus should seek before reading anything - protected boolean isInit; - - protected Reader(FileDataInput file, boolean isInit, boolean shouldCloseFile) + protected Reader(FileDataInput file, boolean shouldCloseFile) { this.file = file; - this.isInit = isInit; this.shouldCloseFile = shouldCloseFile; if (file != null) createDeserializer(); - else - assert !isInit; } private void createDeserializer() @@ -343,12 +337,6 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator { try { - if (!isInit) - { - init(); - isInit = true; - } - return hasNextInternal(); } catch (IOException e) @@ -387,9 +375,6 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator } } - // Called is hasNext() is called but we haven't been yet initialized - protected abstract void init() throws IOException; - // Set the reader so its hasNext/next methods return values within the provided slice public abstract void setForSlice(Slice slice) throws IOException; @@ -458,9 +443,21 @@ abstract class AbstractSSTableIterator implements SliceableUnfilteredRowIterator } // Update the block idx based on the current reader position if we're past the current block. + // This only makes sense for forward iteration (for reverse ones, when we reach the end of a block we + // should seek to the previous one, not update the index state and continue). public void updateBlock() throws IOException { - assert currentIndexIdx >= 0; + assert !reversed; + + // If we get here with currentBlockIdx < 0, it means setToBlock() has never been called, so it means + // we're about to read from the beginning of the partition, but haven't "prepared" the IndexState yet. + // Do so by setting us on the first block. + if (currentIndexIdx < 0) + { + setToBlock(0); + return; + } + while (currentIndexIdx + 1 < indexes.size() && isPastCurrentBlock()) { reader.openMarker = currentIndex().endOpenMarker; http://git-wip-us.apache.org/repos/asf/cassandra/blob/8bc567b3/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java index 3536d65..0409310 100644 --- a/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java +++ b/src/java/org/apache/cassandra/db/columniterator/SSTableIterator.java @@ -46,11 +46,11 @@ public class SSTableIterator extends AbstractSSTableIterator super(sstable, file, key, indexEntry, columns, isForThrift); } - protected Reader createReader(RowIndexEntry indexEntry, FileDataInput file, boolean isAtPartitionStart, boolean shouldCloseFile) + protected Reader createReader(RowIndexEntry indexEntry, FileDataInput file, boolean shouldCloseFile) { return indexEntry.isIndexed() - ? new ForwardIndexedReader(indexEntry, file, isAtPartitionStart, shouldCloseFile) - : new ForwardReader(file, isAtPartitionStart, shouldCloseFile); + ? new ForwardIndexedReader(indexEntry, file, shouldCloseFile) + : new ForwardReader(file, shouldCloseFile); } public boolean isReverseOrder() @@ -70,16 +70,9 @@ public class SSTableIterator extends AbstractSSTableIterator protected boolean sliceDone; // set to true once we know we have no more result for the slice. This is in particular // used by the indexed reader when we know we can't have results based on the index. - private ForwardReader(FileDataInput file, boolean isAtPartitionStart, boolean shouldCloseFile) + private ForwardReader(FileDataInput file, boolean shouldCloseFile) { - super(file, isAtPartitionStart, shouldCloseFile); - } - - protected void init() throws IOException - { - // We should always have been initialized (at the beginning of the partition). Only indexed readers may - // have to initialize. - throw new IllegalStateException(); + super(file, shouldCloseFile); } public void setForSlice(Slice slice) throws IOException @@ -95,6 +88,8 @@ public class SSTableIterator extends AbstractSSTableIterator // Return what should be returned at the end of this, or null if nothing should. private Unfiltered handlePreSliceData() throws IOException { + assert deserializer != null; + // Note that the following comparison is not strict. The reason is that the only cases // where it can be == is if the "next" is a RT start marker (either a '[' of a ')[' boundary), // and if we had a strict inequality and an open RT marker before this, we would issue @@ -126,6 +121,8 @@ public class SSTableIterator extends AbstractSSTableIterator // if we're done with the slice. protected Unfiltered computeNext() throws IOException { + assert deserializer != null; + if (!deserializer.hasNext() || deserializer.compareNextTo(end) > 0) return null; @@ -143,8 +140,6 @@ public class SSTableIterator extends AbstractSSTableIterator if (sliceDone) return false; - assert deserializer != null; - if (start != null) { Unfiltered unfiltered = handlePreSliceData(); @@ -187,28 +182,18 @@ public class SSTableIterator extends AbstractSSTableIterator private int lastBlockIdx; // the last index block that has data for the current query - private ForwardIndexedReader(RowIndexEntry indexEntry, FileDataInput file, boolean isAtPartitionStart, boolean shouldCloseFile) + private ForwardIndexedReader(RowIndexEntry indexEntry, FileDataInput file, boolean shouldCloseFile) { - super(file, isAtPartitionStart, shouldCloseFile); + super(file, shouldCloseFile); this.indexState = new IndexState(this, sstable.metadata.comparator, indexEntry, false); this.lastBlockIdx = indexState.blocksCount(); // if we never call setForSlice, that's where we want to stop } @Override - protected void init() throws IOException - { - // If this is called, it means we're calling hasNext() before any call to setForSlice. Which means - // we're reading everything from the beginning. So just set us up at the beginning of the first block. - indexState.setToBlock(0); - } - - @Override public void setForSlice(Slice slice) throws IOException { super.setForSlice(slice); - isInit = true; - // if our previous slicing already got us the biggest row in the sstable, we're done if (indexState.isDone()) { @@ -265,6 +250,7 @@ public class SSTableIterator extends AbstractSSTableIterator protected Unfiltered computeNext() throws IOException { // Our previous read might have made us cross an index block boundary. If so, update our informations. + // If we read from the beginning of the partition, this is also what will initialize the index state. indexState.updateBlock(); // Return the next unfiltered unless we've reached the end, or we're beyond our slice http://git-wip-us.apache.org/repos/asf/cassandra/blob/8bc567b3/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java index 66c32ee..14cec36 100644 --- a/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java +++ b/src/java/org/apache/cassandra/db/columniterator/SSTableReversedIterator.java @@ -49,11 +49,11 @@ public class SSTableReversedIterator extends AbstractSSTableIterator super(sstable, file, key, indexEntry, columns, isForThrift); } - protected Reader createReader(RowIndexEntry indexEntry, FileDataInput file, boolean isAtPartitionStart, boolean shouldCloseFile) + protected Reader createReader(RowIndexEntry indexEntry, FileDataInput file, boolean shouldCloseFile) { return indexEntry.isIndexed() - ? new ReverseIndexedReader(indexEntry, file, isAtPartitionStart, shouldCloseFile) - : new ReverseReader(file, isAtPartitionStart, shouldCloseFile); + ? new ReverseIndexedReader(indexEntry, file, shouldCloseFile) + : new ReverseReader(file, shouldCloseFile); } public boolean isReverseOrder() @@ -66,9 +66,9 @@ public class SSTableReversedIterator extends AbstractSSTableIterator protected ReusablePartitionData buffer; protected Iterator<Unfiltered> iterator; - private ReverseReader(FileDataInput file, boolean isAtPartitionStart, boolean shouldCloseFile) + private ReverseReader(FileDataInput file, boolean shouldCloseFile) { - super(file, isAtPartitionStart, shouldCloseFile); + super(file, shouldCloseFile); } protected ReusablePartitionData createBuffer(int blocksCount) @@ -100,13 +100,6 @@ public class SSTableReversedIterator extends AbstractSSTableIterator return new ReusablePartitionData(metadata(), partitionKey(), columns(), estimatedRowCount); } - protected void init() throws IOException - { - // We should always have been initialized (at the beginning of the partition). Only indexed readers may - // have to initialize. - throw new IllegalStateException(); - } - public void setForSlice(Slice slice) throws IOException { // If we have read the data, just create the iterator for the slice. Otherwise, read the data. @@ -212,23 +205,16 @@ public class SSTableReversedIterator extends AbstractSSTableIterator // The last index block to consider for the slice private int lastBlockIdx; - private ReverseIndexedReader(RowIndexEntry indexEntry, FileDataInput file, boolean isAtPartitionStart, boolean shouldCloseFile) + private ReverseIndexedReader(RowIndexEntry indexEntry, FileDataInput file, boolean shouldCloseFile) { - super(file, isAtPartitionStart, shouldCloseFile); + super(file, shouldCloseFile); this.indexState = new IndexState(this, sstable.metadata.comparator, indexEntry, true); } - protected void init() throws IOException - { - // This is actually a no-op, because if we call hasNext without having called setForSlice, then ReverseReader.hasNextInternal - // will call setForSlice(Slice.ALL) which does the right thing. - } - @Override public void setForSlice(Slice slice) throws IOException { this.slice = slice; - isInit = true; // if our previous slicing already got us past the beginning of the sstable, we're done if (indexState.isDone()) http://git-wip-us.apache.org/repos/asf/cassandra/blob/8bc567b3/test/unit/org/apache/cassandra/cql3/QueryWithIndexedSSTableTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/cql3/QueryWithIndexedSSTableTest.java b/test/unit/org/apache/cassandra/cql3/QueryWithIndexedSSTableTest.java new file mode 100644 index 0000000..4838392 --- /dev/null +++ b/test/unit/org/apache/cassandra/cql3/QueryWithIndexedSSTableTest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.cassandra.cql3; + +import java.util.Random; + +import org.junit.Test; + +import org.apache.cassandra.Util; +import org.apache.cassandra.db.DecoratedKey; +import org.apache.cassandra.db.RowIndexEntry; +import org.apache.cassandra.io.sstable.format.SSTableReader; +import org.apache.cassandra.utils.ByteBufferUtil; + +public class QueryWithIndexedSSTableTest extends CQLTester +{ + @Test + public void queryIndexedSSTableTest() throws Throwable + { + // That test reproduces the bug from CASSANDRA-10903 and the fact we have a static column is + // relevant to that reproduction in particular as it forces a slightly different code path that + // if there wasn't a static. + + int ROWS = 1000; + int VALUE_LENGTH = 100; + + createTable("CREATE TABLE %s (k int, t int, s text static, v text, PRIMARY KEY (k, t))"); + + // We create a partition that is big enough that the underlying sstable will be indexed + // For that, we use a large-ish number of row, and a value that isn't too small. + String text = makeRandomTest(VALUE_LENGTH); + for (int i = 0; i < ROWS; i++) + execute("INSERT INTO %s(k, t, v) VALUES (?, ?, ?)", 0, i, text + i); + + flush(); + compact(); + + // Sanity check that we're testing what we want to test, that is that we're reading from an indexed + // sstable. Note that we'll almost surely have a single indexed sstable in practice, but it's theorically + // possible for a compact strategy to yield more than that and as long as one is indexed we're pretty + // much testing what we want. If this check ever fails on some specific setting, we'll have to either + // tweak ROWS and VALUE_LENGTH, or skip the test on those settings. + DecoratedKey dk = Util.dk(ByteBufferUtil.bytes(0)); + boolean hasIndexed = false; + for (SSTableReader sstable : getCurrentColumnFamilyStore().getLiveSSTables()) + { + RowIndexEntry indexEntry = sstable.getPosition(dk, SSTableReader.Operator.EQ); + hasIndexed |= indexEntry != null && indexEntry.isIndexed(); + } + assert hasIndexed; + + assertRowCount(execute("SELECT s FROM %s WHERE k = ?", 0), ROWS); + assertRowCount(execute("SELECT s FROM %s WHERE k = ? ORDER BY t DESC", 0), ROWS); + + assertRowCount(execute("SELECT DISTINCT s FROM %s WHERE k = ?", 0), 1); + assertRowCount(execute("SELECT DISTINCT s FROM %s WHERE k = ? ORDER BY t DESC", 0), 1); + } + + // Creates a random string + public static String makeRandomTest(int length) + { + Random random = new Random(); + char[] chars = new char[26]; + int i = 0; + for (char c = 'a'; c <= 'z'; c++) + chars[i++] = c; + return new String(chars); + } +}