Updated Branches: refs/heads/trunk 98a70bdeb -> 772c4f1a7
Estimate how many keys overlap with other sstables before compacting a single sstable because of high tombstone counts patch by yukim; reviewed by jbellis for CASSANDRA-4022 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/772c4f1a Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/772c4f1a Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/772c4f1a Branch: refs/heads/trunk Commit: 772c4f1a7939e1bf236c5bf3e7bf624f7078c80b Parents: 98a70bd Author: Jonathan Ellis <jbel...@apache.org> Authored: Fri Mar 23 11:43:32 2012 -0500 Committer: Jonathan Ellis <jbel...@apache.org> Committed: Fri Mar 30 23:46:05 2012 -0500 ---------------------------------------------------------------------- .../compaction/SizeTieredCompactionStrategy.java | 33 +++++++++++++- .../apache/cassandra/utils/EstimatedHistogram.java | 25 +++++++++++ .../cassandra/db/compaction/CompactionsTest.java | 9 +--- .../cassandra/utils/EstimatedHistogramTest.java | 17 ++++++++ 4 files changed, 74 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/772c4f1a/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java b/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java index d6f4025..0eabd08 100644 --- a/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java +++ b/src/java/org/apache/cassandra/db/compaction/SizeTieredCompactionStrategy.java @@ -24,6 +24,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.cassandra.db.ColumnFamilyStore; +import org.apache.cassandra.dht.Range; +import org.apache.cassandra.dht.Token; import org.apache.cassandra.io.sstable.SSTable; import org.apache.cassandra.io.sstable.SSTableReader; import org.apache.cassandra.utils.Pair; @@ -87,8 +89,34 @@ public class SizeTieredCompactionStrategy extends AbstractCompactionStrategy { for (SSTableReader table : bucket) { - if (table.getEstimatedDroppableTombstoneRatio(gcBefore) > tombstoneThreshold) + double droppableRatio = table.getEstimatedDroppableTombstoneRatio(gcBefore); + if (droppableRatio <= tombstoneThreshold) + continue; + + Set<SSTableReader> overlaps = cfs.getOverlappingSSTables(Collections.singleton(table)); + if (overlaps.isEmpty()) + { + // there is no overlap, tombstones are safely droppable prunedBuckets.add(Collections.singletonList(table)); + } + else + { + // what percentage of columns do we expect to compact outside of overlap? + // first, calculate estimated keys that do not overlap + long keys = table.estimatedKeys(); + Set<Range<Token>> ranges = new HashSet<Range<Token>>(); + for (SSTableReader overlap : overlaps) + ranges.add(new Range<Token>(overlap.first.token, overlap.last.token)); + long remainingKeys = keys - table.estimatedKeysForRanges(ranges); + // next, calculate what percentage of columns we have within those keys + double remainingKeysRatio = ((double) remainingKeys) / keys; + long columns = table.getEstimatedColumnCount().percentile(remainingKeysRatio) * remainingKeys; + double remainingColumnsRatio = ((double) columns) / (table.getEstimatedColumnCount().count() * table.getEstimatedColumnCount().mean()); + + // if we still expect to have droppable tombstones in rest of columns, then try compacting it + if (remainingColumnsRatio * droppableRatio > tombstoneThreshold) + prunedBuckets.add(Collections.singletonList(table)); + } } } @@ -127,8 +155,7 @@ public class SizeTieredCompactionStrategy extends AbstractCompactionStrategy public AbstractCompactionTask getUserDefinedTask(Collection<SSTableReader> sstables, final int gcBefore) { - return new CompactionTask(cfs, sstables, gcBefore) - .isUserDefined(true); + return new CompactionTask(cfs, sstables, gcBefore).isUserDefined(true); } public int getEstimatedRemainingTasks() http://git-wip-us.apache.org/repos/asf/cassandra/blob/772c4f1a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java index 58d9ecc..7a184d8 100644 --- a/src/java/org/apache/cassandra/utils/EstimatedHistogram.java +++ b/src/java/org/apache/cassandra/utils/EstimatedHistogram.java @@ -161,6 +161,31 @@ public class EstimatedHistogram } /** + * @param percentile + * @return estimated value at given percentile + */ + public long percentile(double percentile) + { + assert percentile >= 0 && percentile <= 1.0; + int lastBucket = buckets.length() - 1; + if (buckets.get(lastBucket) > 0) + throw new IllegalStateException("Unable to compute when histogram overflowed"); + + long pcount = (long) Math.floor(count() * percentile); + if (pcount == 0) + return 0; + + long elements = 0; + for (int i = 0; i < lastBucket; i++) + { + elements += buckets.get(i); + if (elements >= pcount) + return bucketOffsets[i]; + } + return 0; + } + + /** * @return the mean histogram value (average of bucket offsets, weighted by count) * @throws IllegalStateException if any values were greater than the largest bucket threshold */ http://git-wip-us.apache.org/repos/asf/cassandra/blob/772c4f1a/test/unit/org/apache/cassandra/db/compaction/CompactionsTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/db/compaction/CompactionsTest.java b/test/unit/org/apache/cassandra/db/compaction/CompactionsTest.java index e8e01a2..1476b4a 100644 --- a/test/unit/org/apache/cassandra/db/compaction/CompactionsTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionsTest.java @@ -108,14 +108,9 @@ public class CompactionsTest extends SchemaLoader Table table = Table.open(TABLE1); ColumnFamilyStore store = table.getColumnFamilyStore("Standard1"); store.clearUnsafe(); - store.metadata.gcGraceSeconds(5); - - // update SizeTieredCompactionStrategy's min_sstable_size to something small - // to split bucket for ttl'd sstable from others - Map<String, String> opts = new HashMap<String, String>(); - opts.put(SizeTieredCompactionStrategy.MIN_SSTABLE_SIZE_KEY, "512"); - store.metadata.compactionStrategyOptions(opts); + store.metadata.gcGraceSeconds(1); store.setCompactionStrategyClass(SizeTieredCompactionStrategy.class.getCanonicalName()); + // disable compaction while flushing store.disableAutoCompaction(); http://git-wip-us.apache.org/repos/asf/cassandra/blob/772c4f1a/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java ---------------------------------------------------------------------- diff --git a/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java b/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java index c3826d1..bbfd1c7 100644 --- a/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java +++ b/test/unit/org/apache/cassandra/utils/EstimatedHistogramTest.java @@ -71,4 +71,21 @@ public class EstimatedHistogramTest assertEquals(2, histogram.getBuckets(false)[13]); assertEquals(5021848, histogram.mean()); } + + @Test + public void testPercentile() + { + EstimatedHistogram histogram = new EstimatedHistogram(); + // percentile of empty histogram is 0 + assertEquals(0, histogram.percentile(0.99)); + + histogram.add(1); + // percentile of histogram with just one value will return 0 except 100th + assertEquals(0, histogram.percentile(0.99)); + assertEquals(1, histogram.percentile(1.00)); + + histogram.add(10); + assertEquals(1, histogram.percentile(0.99)); + assertEquals(10, histogram.percentile(1.00)); + } }