Updated Branches: refs/heads/trunk ffd7ee7c3 -> 9297e7b71
Remove row-level bloom filters. patch by Jason Brown; reviewed by Jonathan Ellis for CASSANRDA-4885 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/9297e7b7 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/9297e7b7 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/9297e7b7 Branch: refs/heads/trunk Commit: 9297e7b7196426a6d6c2ad757974ea7fd0866bfa Parents: ffd7ee7 Author: Jason Brown <jasedbr...@gmail.com> Authored: Tue Mar 19 05:57:14 2013 -0700 Committer: Jason Brown <jasedbr...@gmail.com> Committed: Mon Mar 25 05:16:52 2013 -0700 ---------------------------------------------------------------------- src/java/org/apache/cassandra/db/ColumnIndex.java | 20 ++--------- .../org/apache/cassandra/db/RowIndexEntry.java | 27 +++----------- .../db/columniterator/SSTableNamesIterator.java | 27 +++----------- .../db/columniterator/SimpleSliceReader.java | 3 +- .../db/compaction/LazilyCompactedRow.java | 2 +- .../cassandra/db/compaction/PrecompactedRow.java | 2 +- .../apache/cassandra/io/sstable/Descriptor.java | 3 ++ .../apache/cassandra/io/sstable/IndexHelper.java | 28 --------------- .../io/sstable/SSTableIdentityIterator.java | 5 +-- .../apache/cassandra/io/sstable/SSTableWriter.java | 4 +- test/unit/org/apache/cassandra/Util.java | 2 +- test/unit/org/apache/cassandra/db/ScrubTest.java | 3 +- 12 files changed, 29 insertions(+), 97 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/db/ColumnIndex.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/ColumnIndex.java b/src/java/org/apache/cassandra/db/ColumnIndex.java index b72638b..e307458 100644 --- a/src/java/org/apache/cassandra/db/ColumnIndex.java +++ b/src/java/org/apache/cassandra/db/ColumnIndex.java @@ -24,26 +24,16 @@ import java.util.*; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.io.sstable.IndexHelper; -import org.apache.cassandra.utils.AlwaysPresentFilter; -import org.apache.cassandra.utils.IFilter; -import org.apache.cassandra.utils.FilterFactory; public class ColumnIndex { public final List<IndexHelper.IndexInfo> columnsIndex; - public final IFilter bloomFilter; - private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList(), new AlwaysPresentFilter()); + private static final ColumnIndex EMPTY = new ColumnIndex(Collections.<IndexHelper.IndexInfo>emptyList()); - private ColumnIndex(int estimatedColumnCount) - { - this(new ArrayList<IndexHelper.IndexInfo>(), FilterFactory.getFilter(estimatedColumnCount, 4, false)); - } - - private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex, IFilter bloomFilter) + private ColumnIndex(List<IndexHelper.IndexInfo> columnsIndex) { this.columnsIndex = columnsIndex; - this.bloomFilter = bloomFilter; } /** @@ -68,11 +58,10 @@ public class ColumnIndex public Builder(ColumnFamily cf, ByteBuffer key, - int estimatedColumnCount, DataOutput output) { this.indexOffset = rowHeaderSize(key, cf.deletionInfo()); - this.result = new ColumnIndex(estimatedColumnCount); + this.result = new ColumnIndex(new ArrayList<IndexHelper.IndexInfo>()); this.output = output; this.tombstoneTracker = new RangeTombstone.Tracker(cf.getComparator()); } @@ -146,9 +135,6 @@ public class ColumnIndex { atomCount++; - if (column instanceof Column) - result.bloomFilter.add(column.name()); - if (firstColumn == null) { firstColumn = column; http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/db/RowIndexEntry.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/RowIndexEntry.java b/src/java/org/apache/cassandra/db/RowIndexEntry.java index 6bf3e43..9b22b28 100644 --- a/src/java/org/apache/cassandra/db/RowIndexEntry.java +++ b/src/java/org/apache/cassandra/db/RowIndexEntry.java @@ -27,8 +27,6 @@ import java.util.List; import org.apache.cassandra.io.sstable.Descriptor; import org.apache.cassandra.io.sstable.IndexHelper; import org.apache.cassandra.io.util.FileUtils; -import org.apache.cassandra.utils.IFilter; -import org.apache.cassandra.utils.FilterFactory; public class RowIndexEntry { @@ -49,7 +47,7 @@ public class RowIndexEntry public static RowIndexEntry create(long position, DeletionInfo deletionInfo, ColumnIndex index) { if (index != null && index.columnsIndex != null && index.columnsIndex.size() > 1) - return new IndexedEntry(position, deletionInfo, index.columnsIndex, index.bloomFilter); + return new IndexedEntry(position, deletionInfo, index.columnsIndex); else return new RowIndexEntry(position); } @@ -69,11 +67,6 @@ public class RowIndexEntry return Collections.emptyList(); } - public IFilter bloomFilter() - { - throw new UnsupportedOperationException(); - } - public static class Serializer { public void serialize(RowIndexEntry rie, DataOutput dos) throws IOException @@ -86,7 +79,6 @@ public class RowIndexEntry dos.writeInt(rie.columnsIndex().size()); for (IndexHelper.IndexInfo info : rie.columnsIndex()) info.serialize(dos); - FilterFactory.serialize(rie.bloomFilter(), dos); } else { @@ -107,8 +99,10 @@ public class RowIndexEntry List<IndexHelper.IndexInfo> columnsIndex = new ArrayList<IndexHelper.IndexInfo>(entries); for (int i = 0; i < entries; i++) columnsIndex.add(IndexHelper.IndexInfo.deserialize(dis)); - IFilter bf = FilterFactory.deserialize(dis, version.filterType, false); - return new IndexedEntry(position, delInfo, columnsIndex, bf); + + if (version.hasRowLevelBF) + IndexHelper.skipBloomFilter(dis); + return new IndexedEntry(position, delInfo, columnsIndex); } else { @@ -145,16 +139,14 @@ public class RowIndexEntry { private final DeletionInfo deletionInfo; private final List<IndexHelper.IndexInfo> columnsIndex; - private final IFilter bloomFilter; - private IndexedEntry(long position, DeletionInfo deletionInfo, List<IndexHelper.IndexInfo> columnsIndex, IFilter bloomFilter) + private IndexedEntry(long position, DeletionInfo deletionInfo, List<IndexHelper.IndexInfo> columnsIndex) { super(position); assert deletionInfo != null; assert columnsIndex != null && columnsIndex.size() > 1; this.deletionInfo = deletionInfo; this.columnsIndex = columnsIndex; - this.bloomFilter = bloomFilter; } @Override @@ -170,12 +162,6 @@ public class RowIndexEntry } @Override - public IFilter bloomFilter() - { - return bloomFilter; - } - - @Override public int serializedSize() { TypeSizes typeSizes = TypeSizes.NATIVE; @@ -184,7 +170,6 @@ public class RowIndexEntry for (IndexHelper.IndexInfo info : columnsIndex) size += info.serializedSize(typeSizes); - size += FilterFactory.serializedSize(bloomFilter); assert size <= Integer.MAX_VALUE; return (int)size; } http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java index 389fbb2..e7257bf 100644 --- a/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java +++ b/src/java/org/apache/cassandra/db/columniterator/SSTableNamesIterator.java @@ -37,7 +37,6 @@ import org.apache.cassandra.io.util.FileDataInput; import org.apache.cassandra.io.util.FileMark; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.utils.ByteBufferUtil; -import org.apache.cassandra.utils.IFilter; public class SSTableNamesIterator extends SimpleAbstractColumnIterator implements ISSTableColumnIterator { @@ -107,7 +106,6 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement private void read(SSTableReader sstable, FileDataInput file, RowIndexEntry indexEntry) throws IOException { - IFilter bf; List<IndexHelper.IndexInfo> indexList; // If the entry is not indexed or the index is not promoted, read from the row start @@ -127,13 +125,12 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement if (sstable.descriptor.version.hasPromotedIndexes) { - bf = indexEntry.isIndexed() ? indexEntry.bloomFilter() : null; indexList = indexEntry.columnsIndex(); } else { assert file != null; - bf = IndexHelper.defreezeBloomFilter(file, sstable.descriptor.version.filterType); + IndexHelper.skipBloomFilter(file); indexList = IndexHelper.deserializeIndex(file); } @@ -159,20 +156,9 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement } List<OnDiskAtom> result = new ArrayList<OnDiskAtom>(); - List<ByteBuffer> filteredColumnNames = new ArrayList<ByteBuffer>(columns.size()); - for (ByteBuffer name : columns) - { - if (bf == null || bf.isPresent(name)) - { - filteredColumnNames.add(name); - } - } - if (filteredColumnNames.isEmpty()) - return; - if (indexList.isEmpty()) { - readSimpleColumns(file, columns, filteredColumnNames, result); + readSimpleColumns(file, columns, result); } else { @@ -187,14 +173,14 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement file.readInt(); // column count basePosition = file.getFilePointer(); } - readIndexedColumns(sstable.metadata, file, columns, filteredColumnNames, indexList, basePosition, result); + readIndexedColumns(sstable.metadata, file, columns, indexList, basePosition, result); } // create an iterator view of the columns we read iter = result.iterator(); } - private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> columnNames, List<ByteBuffer> filteredColumnNames, List<OnDiskAtom> result) throws IOException + private void readSimpleColumns(FileDataInput file, SortedSet<ByteBuffer> columnNames, List<OnDiskAtom> result) throws IOException { Iterator<OnDiskAtom> atomIterator = cf.metadata().getOnDiskIterator(file, file.readInt(), sstable.descriptor.version); int n = 0; @@ -206,7 +192,7 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement if (columnNames.contains(column.name())) { result.add(column); - if (n++ > filteredColumnNames.size()) + if (n++ > columnNames.size()) break; } } @@ -220,7 +206,6 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement private void readIndexedColumns(CFMetaData metadata, FileDataInput file, SortedSet<ByteBuffer> columnNames, - List<ByteBuffer> filteredColumnNames, List<IndexHelper.IndexInfo> indexList, long basePosition, List<OnDiskAtom> result) @@ -230,7 +215,7 @@ public class SSTableNamesIterator extends SimpleAbstractColumnIterator implement AbstractType<?> comparator = metadata.comparator; List<IndexHelper.IndexInfo> ranges = new ArrayList<IndexHelper.IndexInfo>(); int lastIndexIdx = -1; - for (ByteBuffer name : filteredColumnNames) + for (ByteBuffer name : columnNames) { int index = IndexHelper.indexFor(name, indexList, comparator, false, lastIndexIdx); if (index < 0 || index == indexList.size()) http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java b/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java index a1a0be4..1231710 100644 --- a/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java +++ b/src/java/org/apache/cassandra/db/columniterator/SimpleSliceReader.java @@ -70,7 +70,8 @@ class SimpleSliceReader extends AbstractIterator<OnDiskAtom> implements OnDiskAt if (!sstable.descriptor.version.hasPromotedIndexes) { - IndexHelper.skipBloomFilter(file); + if(sstable.descriptor.version.hasRowLevelBF) + IndexHelper.skipBloomFilter(file); IndexHelper.skipIndex(file); } http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java b/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java index 7418a18..244c897 100644 --- a/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java +++ b/src/java/org/apache/cassandra/db/compaction/LazilyCompactedRow.java @@ -108,7 +108,7 @@ public class LazilyCompactedRow extends AbstractCompactedRow implements Iterable private void indexAndWrite(DataOutput out) throws IOException { - this.indexBuilder = new ColumnIndex.Builder(emptyColumnFamily, key.key, getEstimatedColumnCount(), out); + this.indexBuilder = new ColumnIndex.Builder(emptyColumnFamily, key.key, out); this.columnsIndex = indexBuilder.build(this); } http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/db/compaction/PrecompactedRow.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/compaction/PrecompactedRow.java b/src/java/org/apache/cassandra/db/compaction/PrecompactedRow.java index 213bb8e..f90b8c6 100644 --- a/src/java/org/apache/cassandra/db/compaction/PrecompactedRow.java +++ b/src/java/org/apache/cassandra/db/compaction/PrecompactedRow.java @@ -131,7 +131,7 @@ public class PrecompactedRow extends AbstractCompactedRow { assert compactedCf != null; DataOutputBuffer buffer = new DataOutputBuffer(); - ColumnIndex.Builder builder = new ColumnIndex.Builder(compactedCf, key.key, compactedCf.getColumnCount(), buffer); + ColumnIndex.Builder builder = new ColumnIndex.Builder(compactedCf, key.key, buffer); columnIndex = builder.build(compactedCf); TypeSizes typeSizes = TypeSizes.NATIVE; http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/io/sstable/Descriptor.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/Descriptor.java b/src/java/org/apache/cassandra/io/sstable/Descriptor.java index 2d1b510..fabfbb8 100644 --- a/src/java/org/apache/cassandra/io/sstable/Descriptor.java +++ b/src/java/org/apache/cassandra/io/sstable/Descriptor.java @@ -72,6 +72,7 @@ public class Descriptor // into this new format) // tracks max local deletiontime in sstable metadata // records bloom_filter_fp_chance in metadata component + // remove row-level BF (CASSANDRA-4885) public static final Version CURRENT = new Version(current_version); @@ -94,6 +95,7 @@ public class Descriptor public final boolean hasSuperColumns; public final boolean tracksMaxLocalDeletionTime; public final boolean hasBloomFilterFPChance; + public final boolean hasRowLevelBF; public Version(String version) { @@ -120,6 +122,7 @@ public class Descriptor filterType = FilterFactory.Type.MURMUR3; hasSuperColumns = version.compareTo("ja") < 0; hasBloomFilterFPChance = version.compareTo("ja") >= 0; + hasRowLevelBF = version.compareTo("ja") < 0; } /** http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/io/sstable/IndexHelper.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/IndexHelper.java b/src/java/org/apache/cassandra/io/sstable/IndexHelper.java index 36e972e..ef25a4f 100644 --- a/src/java/org/apache/cassandra/io/sstable/IndexHelper.java +++ b/src/java/org/apache/cassandra/io/sstable/IndexHelper.java @@ -105,34 +105,6 @@ public class IndexHelper return indexList; } - public static IFilter defreezeBloomFilter(FileDataInput file, FilterFactory.Type type) throws IOException - { - return defreezeBloomFilter(file, Integer.MAX_VALUE, type); - } - - /** - * De-freeze the bloom filter. - * - * @param file - source file - * @param maxSize - sanity check: if filter claimes to be larger than this it is bogus - * @param type - Bloom Filter type. - * - * @return bloom filter summarizing the column information - * @throws java.io.IOException if an I/O error occurs. - * Guarantees that file's current position will be just after the bloom filter, even if - * the filter cannot be deserialized, UNLESS EOFException is thrown. - */ - public static IFilter defreezeBloomFilter(FileDataInput file, long maxSize, FilterFactory.Type type) throws IOException - { - int size = file.readInt(); - if (size > maxSize || size <= 0) - throw new EOFException("bloom filter claims to be " + size + " bytes, longer than entire row size " + maxSize); - ByteBuffer bytes = file.readBytes(size); - - DataInputStream stream = new DataInputStream(ByteBufferUtil.inputStream(bytes)); - return FilterFactory.deserialize(stream, type, false); - } - /** * The index of the IndexInfo in which a scan starting with @name should begin. * http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java index d2839c8..ceb1df3 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableIdentityIterator.java @@ -126,7 +126,7 @@ public class SSTableIdentityIterator implements Comparable<SSTableIdentityIterat { try { - IndexHelper.defreezeBloomFilter(file, dataSize, sstable.descriptor.version.filterType); + IndexHelper.skipBloomFilter(file); } catch (Exception e) { @@ -135,10 +135,9 @@ public class SSTableIdentityIterator implements Comparable<SSTableIdentityIterat logger.debug("Invalid bloom filter in {}; will rebuild it", sstable); } - try { - // deFreeze should have left the file position ready to deserialize index + // skipping the old row-level BF should have left the file position ready to deserialize index IndexHelper.deserializeIndex(file); } catch (Exception e) http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java index 55871db..6f954dc 100644 --- a/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java +++ b/src/java/org/apache/cassandra/io/sstable/SSTableWriter.java @@ -184,7 +184,7 @@ public class SSTableWriter extends SSTable DataOutputBuffer buffer = new DataOutputBuffer(); // build column index && write columns - ColumnIndex.Builder builder = new ColumnIndex.Builder(cf, decoratedKey.key, cf.getColumnCount(), buffer); + ColumnIndex.Builder builder = new ColumnIndex.Builder(cf, decoratedKey.key, buffer); ColumnIndex index = builder.build(cf); TypeSizes typeSizes = TypeSizes.NATIVE; @@ -245,7 +245,7 @@ public class SSTableWriter extends SSTable ColumnFamily cf = ColumnFamily.create(metadata, ArrayBackedSortedColumns.factory()); cf.delete(deletionInfo); - ColumnIndex.Builder columnIndexer = new ColumnIndex.Builder(cf, key.key, columnCount, dataFile.stream); + ColumnIndex.Builder columnIndexer = new ColumnIndex.Builder(cf, key.key, dataFile.stream); OnDiskAtom.Serializer atomSerializer = Column.onDiskSerializer(); for (int i = 0; i < columnCount; i++) { http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/test/unit/org/apache/cassandra/Util.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/Util.java b/test/unit/org/apache/cassandra/Util.java index 0402918..be5d218 100644 --- a/test/unit/org/apache/cassandra/Util.java +++ b/test/unit/org/apache/cassandra/Util.java @@ -294,7 +294,7 @@ public class Util DataOutputStream dos = new DataOutputStream(baos); DeletionInfo.serializer().serializeForSSTable(cf.deletionInfo(), dos); dos.writeInt(cf.getColumnCount()); - new ColumnIndex.Builder(cf, ByteBufferUtil.EMPTY_BYTE_BUFFER, cf.getColumnCount(), dos).build(cf); + new ColumnIndex.Builder(cf, ByteBufferUtil.EMPTY_BYTE_BUFFER, dos).build(cf); return ByteBuffer.wrap(baos.toByteArray()); } catch (IOException e) http://git-wip-us.apache.org/repos/asf/cassandra/blob/9297e7b7/test/unit/org/apache/cassandra/db/ScrubTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/db/ScrubTest.java b/test/unit/org/apache/cassandra/db/ScrubTest.java index d716ea0..e800bbb 100644 --- a/test/unit/org/apache/cassandra/db/ScrubTest.java +++ b/test/unit/org/apache/cassandra/db/ScrubTest.java @@ -101,8 +101,9 @@ public class ScrubTest extends SchemaLoader rows = cfs.getRangeSlice(Util.range("", ""), 1000, new NamesQueryFilter(CompositeType.build(ByteBufferUtil.bytes("1"))), null); fail("This slice should fail"); } - catch (NegativeArraySizeException e) + catch (IllegalArgumentException e) { + // thrown by Buffer.limit as the column names are attempted to be read (after the row-level BF is skipped) caught = true; } assert caught : "'corrupt' test file actually was not";