Merge branch 'cassandra-3.11' into trunk
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/10d5b7b2 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/10d5b7b2 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/10d5b7b2 Branch: refs/heads/trunk Commit: 10d5b7b2f77fb7c25e288f42f7fb64b3131fad35 Parents: c8d15f0 14d67d8 Author: Mick Semb Wever <m...@apache.org> Authored: Tue Sep 5 08:36:12 2017 +1000 Committer: Mick Semb Wever <m...@apache.org> Committed: Tue Sep 5 08:38:48 2017 +1000 ---------------------------------------------------------------------- doc/cql3/CQL.textile | 36 +++++----- doc/source/operating/compaction.rst | 8 ++- .../db/compaction/CompactionController.java | 67 ++++++++++++++++-- .../TimeWindowCompactionController.java | 49 +++++++++++++ .../TimeWindowCompactionStrategy.java | 10 +-- .../TimeWindowCompactionStrategyOptions.java | 22 ++++++ .../db/compaction/TimeWindowCompactionTask.java | 42 +++++++++++ .../db/compaction/CompactionControllerTest.java | 5 ++ .../TimeWindowCompactionStrategyTest.java | 74 +++++++++++++++++++- 9 files changed, 281 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/10d5b7b2/doc/cql3/CQL.textile ---------------------------------------------------------------------- diff --cc doc/cql3/CQL.textile index 88d6694,f2f9bd8..db1ec22 --- a/doc/cql3/CQL.textile +++ b/doc/cql3/CQL.textile @@@ -347,24 -347,24 +347,24 @@@ h4(#compactionOptions). Compaction opti The @compaction@ property must at least define the @'class'@ sub-option, that defines the compaction strategy class to use. The default supported class are @'SizeTieredCompactionStrategy'@, @'LeveledCompactionStrategy'@, @'DateTieredCompactionStrategy'@ and @'TimeWindowCompactionStrategy'@. Custom strategy can be provided by specifying the full class name as a "string constant":#constants. The rest of the sub-options depends on the chosen class. The sub-options supported by the default classes are: --|_. option |_. supported compaction strategy |_. default |_. description | --| @enabled@ | _all_ | true | A boolean denoting whether compaction should be enabled or not.| --| @tombstone_threshold@ | _all_ | 0.2 | A ratio such that if a sstable has more than this ratio of gcable tombstones over all contained columns, the sstable will be compacted (with no other sstables) for the purpose of purging those tombstones. | --| @tombstone_compaction_interval@ | _all_ | 1 day | The minimum time to wait after an sstable creation time before considering it for "tombstone compaction", where "tombstone compaction" is the compaction triggered if the sstable has more gcable tombstones than @tombstone_threshold@. | --| @unchecked_tombstone_compaction@ | _all_ | false | Setting this to true enables more aggressive tombstone compactions - single sstable tombstone compactions will run without checking how likely it is that they will be successful. | --| @min_sstable_size@ | SizeTieredCompactionStrategy | 50MB | The size tiered strategy groups SSTables to compact in buckets. A bucket groups SSTables that differs from less than 50% in size. However, for small sizes, this would result in a bucketing that is too fine grained. @min_sstable_size@ defines a size threshold (in bytes) below which all SSTables belong to one unique bucket| --| @min_threshold@ | SizeTieredCompactionStrategy | 4 | Minimum number of SSTables needed to start a minor compaction.| --| @max_threshold@ | SizeTieredCompactionStrategy | 32 | Maximum number of SSTables processed by one minor compaction.| --| @bucket_low@ | SizeTieredCompactionStrategy | 0.5 | Size tiered consider sstables to be within the same bucket if their size is within [average_size * @bucket_low@, average_size * @bucket_high@ ] (i.e the default groups sstable whose sizes diverges by at most 50%)| --| @bucket_high@ | SizeTieredCompactionStrategy | 1.5 | Size tiered consider sstables to be within the same bucket if their size is within [average_size * @bucket_low@, average_size * @bucket_high@ ] (i.e the default groups sstable whose sizes diverges by at most 50%).| --| @sstable_size_in_mb@ | LeveledCompactionStrategy | 5MB | The target size (in MB) for sstables in the leveled strategy. Note that while sstable sizes should stay less or equal to @sstable_size_in_mb@, it is possible to exceptionally have a larger sstable as during compaction, data for a given partition key are never split into 2 sstables| --| @timestamp_resolution@ | DateTieredCompactionStrategy | MICROSECONDS | The timestamp resolution used when inserting data, could be MILLISECONDS, MICROSECONDS etc (should be understandable by Java TimeUnit) - don't change this unless you do mutations with USING TIMESTAMP <non_microsecond_timestamps> (or equivalent directly in the client)| --| @base_time_seconds@ | DateTieredCompactionStrategy | 60 | The base size of the time windows. | --| @max_sstable_age_days@ | DateTieredCompactionStrategy | 365 | SSTables only containing data that is older than this will never be compacted. | --| @timestamp_resolution@ | TimeWindowCompactionStrategy | MICROSECONDS | The timestamp resolution used when inserting data, could be MILLISECONDS, MICROSECONDS etc (should be understandable by Java TimeUnit) - don't change this unless you do mutations with USING TIMESTAMP <non_microsecond_timestamps> (or equivalent directly in the client)| --| @compaction_window_unit@ | TimeWindowCompactionStrategy | DAYS | The Java TimeUnit used for the window size, set in conjunction with @compaction_window_size@. Must be one of DAYS, HOURS, MINUTES | --| @compaction_window_size@ | TimeWindowCompactionStrategy | 1 | The number of @compaction_window_unit@ units that make up a time window. | -- ++|_. option |_. supported compaction strategy |_. default |_. description | ++| @enabled@ | _all_ | true | A boolean denoting whether compaction should be enabled or not.| ++| @tombstone_threshold@ | _all_ | 0.2 | A ratio such that if a sstable has more than this ratio of gcable tombstones over all contained columns, the sstable will be compacted (with no other sstables) for the purpose of purging those tombstones. | ++| @tombstone_compaction_interval@ | _all_ | 1 day | The minimum time to wait after an sstable creation time before considering it for "tombstone compaction", where "tombstone compaction" is the compaction triggered if the sstable has more gcable tombstones than @tombstone_threshold@. | ++| @unchecked_tombstone_compaction@ | _all_ | false | Setting this to true enables more aggressive tombstone compactions - single sstable tombstone compactions will run without checking how likely it is that they will be successful. | ++| @min_sstable_size@ | SizeTieredCompactionStrategy | 50MB | The size tiered strategy groups SSTables to compact in buckets. A bucket groups SSTables that differs from less than 50% in size. However, for small sizes, this would result in a bucketing that is too fine grained. @min_sstable_size@ defines a size threshold (in bytes) below which all SSTables belong to one unique bucket| ++| @min_threshold@ | SizeTieredCompactionStrategy | 4 | Minimum number of SSTables needed to start a minor compaction.| ++| @max_threshold@ | SizeTieredCompactionStrategy | 32 | Maximum number of SSTables processed by one minor compaction.| ++| @bucket_low@ | SizeTieredCompactionStrategy | 0.5 | Size tiered consider sstables to be within the same bucket if their size is within [average_size * @bucket_low@, average_size * @bucket_high@ ] (i.e the default groups sstable whose sizes diverges by at most 50%)| ++| @bucket_high@ | SizeTieredCompactionStrategy | 1.5 | Size tiered consider sstables to be within the same bucket if their size is within [average_size * @bucket_low@, average_size * @bucket_high@ ] (i.e the default groups sstable whose sizes diverges by at most 50%).| ++| @sstable_size_in_mb@ | LeveledCompactionStrategy | 5MB | The target size (in MB) for sstables in the leveled strategy. Note that while sstable sizes should stay less or equal to @sstable_size_in_mb@, it is possible to exceptionally have a larger sstable as during compaction, data for a given partition key are never split into 2 sstables| ++| @timestamp_resolution@ | DateTieredCompactionStrategy | MICROSECONDS | The timestamp resolution used when inserting data, could be MILLISECONDS, MICROSECONDS etc (should be understandable by Java TimeUnit) - don't change this unless you do mutations with USING TIMESTAMP <non_microsecond_timestamps> (or equivalent directly in the client)| ++| @base_time_seconds@ | DateTieredCompactionStrategy | 60 | The base size of the time windows. | ++| @max_sstable_age_days@ | DateTieredCompactionStrategy | 365 | SSTables only containing data that is older than this will never be compacted. | ++| @timestamp_resolution@ | TimeWindowCompactionStrategy | MICROSECONDS | The timestamp resolution used when inserting data, could be MILLISECONDS, MICROSECONDS etc (should be understandable by Java TimeUnit) - don't change this unless you do mutations with USING TIMESTAMP <non_microsecond_timestamps> (or equivalent directly in the client)| ++| @compaction_window_unit@ | TimeWindowCompactionStrategy | DAYS | The Java TimeUnit used for the window size, set in conjunction with @compaction_window_size@. Must be one of DAYS, HOURS, MINUTES | ++| @compaction_window_size@ | TimeWindowCompactionStrategy | 1 | The number of @compaction_window_unit@ units that make up a time window. | ++| @unsafe_aggressive_sstable_expiration@ | TimeWindowCompactionStrategy | false | Expired sstables will be dropped without checking its data is shadowing other sstables. This is a potentially risky option that can lead to data loss or deleted data re-appearing, going beyond what `unchecked_tombstone_compaction` does for single sstable compaction. Due to the risk the jvm must also be started with `-Dcassandra.unsafe_aggressive_sstable_expiration=true`. | h4(#compressionOptions). Compression options http://git-wip-us.apache.org/repos/asf/cassandra/blob/10d5b7b2/doc/source/operating/compaction.rst ---------------------------------------------------------------------- diff --cc doc/source/operating/compaction.rst index 0f39000,0f39000..cb6be45 --- a/doc/source/operating/compaction.rst +++ b/doc/source/operating/compaction.rst @@@ -160,7 -160,7 +160,8 @@@ compaction can drop that sstable. If yo tombstones once the time to live has expired) but it is not being dropped by compaction, it is likely that other sstables contain older data. There is a tool called ``sstableexpiredblockers`` that will list which sstables are droppable and which are blocking them from being dropped. This is especially useful for time series compaction with --``TimeWindowCompactionStrategy`` (and the deprecated ``DateTieredCompactionStrategy``). ++``TimeWindowCompactionStrategy`` (and the deprecated ``DateTieredCompactionStrategy``). With ``TimeWindowCompactionStrategy`` ++it is possible to remove the guarantee (not check for shadowing data) by enabling ``unsafe_aggressive_sstable_expiration``. Repaired/unrepaired data ^^^^^^^^^^^^^^^^^^^^^^^^ @@@ -403,6 -403,6 +404,11 @@@ as the combination of two primary optio A Java TimeUnit (MINUTES, HOURS, or DAYS). ``compaction_window_size`` (default: 1) The number of units that make up a window. ++``unsafe_aggressive_sstable_expiration`` (default: false) ++ Expired sstables will be dropped without checking its data is shadowing other sstables. This is a potentially ++ risky option that can lead to data loss or deleted data re-appearing, going beyond what ++ `unchecked_tombstone_compaction` does for single sstable compaction. Due to the risk the jvm must also be ++ started with `-Dcassandra.unsafe_aggressive_sstable_expiration=true`. Taken together, the operator can specify windows of virtually any size, and `TimeWindowCompactionStrategy` will work to create a single sstable for writes within that window. For efficiency during writing, the newest window will be http://git-wip-us.apache.org/repos/asf/cassandra/blob/10d5b7b2/src/java/org/apache/cassandra/db/compaction/TimeWindowCompactionStrategy.java ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/10d5b7b2/test/unit/org/apache/cassandra/db/compaction/CompactionControllerTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/db/compaction/CompactionControllerTest.java index 14dc3be,052206e..40693fa --- a/test/unit/org/apache/cassandra/db/compaction/CompactionControllerTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/CompactionControllerTest.java @@@ -179,9 -177,14 +179,14 @@@ public class CompactionControllerTest e expired = CompactionController.getFullyExpiredSSTables(cfs, compacting, overlapping, gcBefore); assertNotNull(expired); assertEquals(0, expired.size()); + + // Now if we explicitly ask to ignore overlaped sstables, we should get back our expired sstable + expired = CompactionController.getFullyExpiredSSTables(cfs, compacting, overlapping, gcBefore, true); + assertNotNull(expired); + assertEquals(1, expired.size()); } - private void applyMutation(CFMetaData cfm, DecoratedKey key, long timestamp) + private void applyMutation(TableMetadata cfm, DecoratedKey key, long timestamp) { ByteBuffer val = ByteBufferUtil.bytes(1L); http://git-wip-us.apache.org/repos/asf/cassandra/blob/10d5b7b2/test/unit/org/apache/cassandra/db/compaction/TimeWindowCompactionStrategyTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/db/compaction/TimeWindowCompactionStrategyTest.java index 0df546d,6fff279..89dd2f5 --- a/test/unit/org/apache/cassandra/db/compaction/TimeWindowCompactionStrategyTest.java +++ b/test/unit/org/apache/cassandra/db/compaction/TimeWindowCompactionStrategyTest.java @@@ -272,4 -284,64 +284,64 @@@ public class TimeWindowCompactionStrate t.transaction.abort(); } + @Test + public void testDropOverlappingExpiredSSTables() throws InterruptedException + { + Keyspace keyspace = Keyspace.open(KEYSPACE1); + ColumnFamilyStore cfs = keyspace.getColumnFamilyStore(CF_STANDARD1); + cfs.truncateBlocking(); + cfs.disableAutoCompaction(); + + ByteBuffer value = ByteBuffer.wrap(new byte[100]); + + // create 2 sstables + DecoratedKey key = Util.dk(String.valueOf("expired")); - new RowUpdateBuilder(cfs.metadata, System.currentTimeMillis(), 1, key.getKey()) ++ new RowUpdateBuilder(cfs.metadata(), System.currentTimeMillis(), 1, key.getKey()) + .clustering("column") + .add("val", value).build().applyUnsafe(); + + cfs.forceBlockingFlush(); + SSTableReader expiredSSTable = cfs.getLiveSSTables().iterator().next(); + Thread.sleep(10); + - new RowUpdateBuilder(cfs.metadata, System.currentTimeMillis() - 1000, key.getKey()) ++ new RowUpdateBuilder(cfs.metadata(), System.currentTimeMillis() - 1000, key.getKey()) + .clustering("column") + .add("val", value).build().applyUnsafe(); + key = Util.dk(String.valueOf("nonexpired")); - new RowUpdateBuilder(cfs.metadata, System.currentTimeMillis(), key.getKey()) ++ new RowUpdateBuilder(cfs.metadata(), System.currentTimeMillis(), key.getKey()) + .clustering("column") + .add("val", value).build().applyUnsafe(); + + cfs.forceBlockingFlush(); + assertEquals(cfs.getLiveSSTables().size(), 2); + + Map<String, String> options = new HashMap<>(); + + options.put(TimeWindowCompactionStrategyOptions.COMPACTION_WINDOW_SIZE_KEY, "30"); + options.put(TimeWindowCompactionStrategyOptions.COMPACTION_WINDOW_UNIT_KEY, "SECONDS"); + options.put(TimeWindowCompactionStrategyOptions.TIMESTAMP_RESOLUTION_KEY, "MILLISECONDS"); + options.put(TimeWindowCompactionStrategyOptions.EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY, "0"); + TimeWindowCompactionStrategy twcs = new TimeWindowCompactionStrategy(cfs, options); + for (SSTableReader sstable : cfs.getLiveSSTables()) + twcs.addSSTable(sstable); + + twcs.startup(); + assertNull(twcs.getNextBackgroundTask((int) (System.currentTimeMillis() / 1000))); + Thread.sleep(2000); + assertNull(twcs.getNextBackgroundTask((int) (System.currentTimeMillis()/1000))); + + options.put(TimeWindowCompactionStrategyOptions.UNSAFE_AGGRESSIVE_SSTABLE_EXPIRATION_KEY, "true"); + twcs = new TimeWindowCompactionStrategy(cfs, options); + for (SSTableReader sstable : cfs.getLiveSSTables()) + twcs.addSSTable(sstable); + + twcs.startup(); + AbstractCompactionTask t = twcs.getNextBackgroundTask((int) (System.currentTimeMillis()/1000)); + assertNotNull(t); + assertEquals(1, Iterables.size(t.transaction.originals())); + SSTableReader sstable = t.transaction.originals().iterator().next(); + assertEquals(sstable, expiredSSTable); + twcs.shutdown(); + t.transaction.abort(); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org