Use keyspace replication settings on system.size_estimates table Patch by Chris Lohfink; Reviewed by Paulo Motta for CASSANDRA-9639
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/af7b20bd Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/af7b20bd Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/af7b20bd Branch: refs/heads/cassandra-3.11 Commit: af7b20bd0ea0d1e80553c519510c9ad9f29af64a Parents: 76ad028 Author: Chris Lohfink <clohfin...@gmail.com> Authored: Thu Jan 26 09:43:47 2017 -0600 Committer: Paulo Motta <pa...@apache.org> Committed: Wed Feb 15 10:27:25 2017 -0200 ---------------------------------------------------------------------- CHANGES.txt | 1 + NEWS.txt | 2 + .../cassandra/db/SizeEstimatesRecorder.java | 54 ++++++++++---------- 3 files changed, 30 insertions(+), 27 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/af7b20bd/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index b19550a..732e14b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0.11 + * Use keyspace replication settings on system.size_estimates table (CASSANDRA-9639) * Add vm.max_map_count StartupCheck (CASSANDRA-13008) * Hint related logging should include the IP address of the destination in addition to host ID (CASSANDRA-13205) http://git-wip-us.apache.org/repos/asf/cassandra/blob/af7b20bd/NEWS.txt ---------------------------------------------------------------------- diff --git a/NEWS.txt b/NEWS.txt index 4248a6e..a5ee496 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -30,6 +30,8 @@ Upgrading - Compaction now correctly drops sstables out of CompactionTask when there isn't enough disk space to perform the full compaction. This should reduce pending compaction tasks on systems with little remaining disk space. + - Primary ranges in the system.size_estimates table are now based on the keyspace + replication settings and adjacent ranges are no longer merged (CASSANDRA-9639). 3.0.10 ===== http://git-wip-us.apache.org/repos/asf/cassandra/blob/af7b20bd/src/java/org/apache/cassandra/db/SizeEstimatesRecorder.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/cassandra/db/SizeEstimatesRecorder.java b/src/java/org/apache/cassandra/db/SizeEstimatesRecorder.java index 0b31b87..ebe3f9a 100644 --- a/src/java/org/apache/cassandra/db/SizeEstimatesRecorder.java +++ b/src/java/org/apache/cassandra/db/SizeEstimatesRecorder.java @@ -69,12 +69,10 @@ public class SizeEstimatesRecorder extends MigrationListener implements Runnable logger.trace("Recording size estimates"); - // find primary token ranges for the local node. - Collection<Token> localTokens = StorageService.instance.getLocalTokens(); - Collection<Range<Token>> localRanges = metadata.getPrimaryRangesFor(localTokens); - for (Keyspace keyspace : Keyspace.nonLocalStrategy()) { + Collection<Range<Token>> localRanges = StorageService.instance.getPrimaryRangesForEndpoint(keyspace.getName(), + FBUtilities.getBroadcastAddress()); for (ColumnFamilyStore table : keyspace.getColumnFamilyStores()) { long start = System.nanoTime(); @@ -91,37 +89,39 @@ public class SizeEstimatesRecorder extends MigrationListener implements Runnable @SuppressWarnings("resource") private void recordSizeEstimates(ColumnFamilyStore table, Collection<Range<Token>> localRanges) { - List<Range<Token>> unwrappedRanges = Range.normalize(localRanges); // for each local primary range, estimate (crudely) mean partition size and partitions count. Map<Range<Token>, Pair<Long, Long>> estimates = new HashMap<>(localRanges.size()); - for (Range<Token> range : unwrappedRanges) + for (Range<Token> localRange : localRanges) { - // filter sstables that have partitions in this range. - Refs<SSTableReader> refs = null; - long partitionsCount, meanPartitionSize; - - try + for (Range<Token> unwrappedRange : localRange.unwrap()) { - while (refs == null) + // filter sstables that have partitions in this range. + Refs<SSTableReader> refs = null; + long partitionsCount, meanPartitionSize; + + try + { + while (refs == null) + { + Iterable<SSTableReader> sstables = table.getTracker().getView().select(SSTableSet.CANONICAL); + SSTableIntervalTree tree = SSTableIntervalTree.build(sstables); + Range<PartitionPosition> r = Range.makeRowRange(unwrappedRange); + Iterable<SSTableReader> canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); + refs = Refs.tryRef(canonicalSSTables); + } + + // calculate the estimates. + partitionsCount = estimatePartitionsCount(refs, unwrappedRange); + meanPartitionSize = estimateMeanPartitionSize(refs); + } + finally { - Iterable<SSTableReader> sstables = table.getTracker().getView().select(SSTableSet.CANONICAL); - SSTableIntervalTree tree = SSTableIntervalTree.build(sstables); - Range<PartitionPosition> r = Range.makeRowRange(range); - Iterable<SSTableReader> canonicalSSTables = View.sstablesInBounds(r.left, r.right, tree); - refs = Refs.tryRef(canonicalSSTables); + if (refs != null) + refs.release(); } - // calculate the estimates. - partitionsCount = estimatePartitionsCount(refs, range); - meanPartitionSize = estimateMeanPartitionSize(refs); - } - finally - { - if (refs != null) - refs.release(); + estimates.put(unwrappedRange, Pair.create(partitionsCount, meanPartitionSize)); } - - estimates.put(range, Pair.create(partitionsCount, meanPartitionSize)); } // atomically update the estimates.