Merge branch 'cassandra-2.2' into cassandra-3.0
Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/7a274dd3 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/7a274dd3 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/7a274dd3 Branch: refs/heads/cassandra-3.0 Commit: 7a274dd36cd6e6bd5c1c5e8d7645945ba74bfa84 Parents: 76f1750 7232d72 Author: Marcus Eriksson <marc...@apache.org> Authored: Thu Oct 13 15:02:19 2016 +0200 Committer: Marcus Eriksson <marc...@apache.org> Committed: Thu Oct 13 15:03:00 2016 +0200 ---------------------------------------------------------------------- CHANGES.txt | 1 + .../apache/cassandra/db/ColumnFamilyStore.java | 7 ++++- .../cassandra/db/ColumnFamilyMetricTest.java | 29 ++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cassandra/blob/7a274dd3/CHANGES.txt ---------------------------------------------------------------------- diff --cc CHANGES.txt index 13800da,682f12b..dc7bcab --- a/CHANGES.txt +++ b/CHANGES.txt @@@ -1,21 -1,5 +1,22 @@@ -2.2.9 +3.0.10 + * Split materialized view mutations on build to prevent OOM (CASSANDRA-12268) + * mx4j does not work in 3.0.8 (CASSANDRA-12274) + * Abort cqlsh copy-from in case of no answer after prolonged period of time (CASSANDRA-12740) + * Avoid sstable corrupt exception due to dropped static column (CASSANDRA-12582) + * Make stress use client mode to avoid checking commit log size on startup (CASSANDRA-12478) + * Fix exceptions with new vnode allocation (CASSANDRA-12715) + * Unify drain and shutdown processes (CASSANDRA-12509) + * Fix NPE in ComponentOfSlice.isEQ() (CASSANDRA-12706) + * Fix failure in LogTransactionTest (CASSANDRA-12632) + * Fix potentially incomplete non-frozen UDT values when querying with the + full primary key specified (CASSANDRA-12605) + * Skip writing MV mutations to commitlog on mutation.applyUnsafe() (CASSANDRA-11670) + * Establish consistent distinction between non-existing partition and NULL value for LWTs on static columns (CASSANDRA-12060) + * Extend ColumnIdentifier.internedInstances key to include the type that generated the byte buffer (CASSANDRA-12516) + * Backport CASSANDRA-10756 (race condition in NativeTransportService shutdown) (CASSANDRA-12472) + * If CF has no clustering columns, any row cache is full partition cache (CASSANDRA-12499) +Merged from 2.2: + * Limit colUpdateTimeDelta histogram updates to reasonable deltas (CASSANDRA-11117) * Fix leak errors and execution rejected exceptions when draining (CASSANDRA-12457) * Fix merkle tree depth calculation (CASSANDRA-12580) * Make Collections deserialization more robust (CASSANDRA-12618) http://git-wip-us.apache.org/repos/asf/cassandra/blob/7a274dd3/src/java/org/apache/cassandra/db/ColumnFamilyStore.java ---------------------------------------------------------------------- diff --cc src/java/org/apache/cassandra/db/ColumnFamilyStore.java index fb3665b,c6b69dc..adfd1af --- a/src/java/org/apache/cassandra/db/ColumnFamilyStore.java +++ b/src/java/org/apache/cassandra/db/ColumnFamilyStore.java @@@ -1184,23 -1343,104 +1184,28 @@@ public class ColumnFamilyStore implemen { long start = System.nanoTime(); Memtable mt = data.getMemtableFor(opGroup, replayPosition); - final long timeDelta = mt.put(key, columnFamily, indexer, opGroup); - maybeUpdateRowCache(key); - metric.samplers.get(Sampler.WRITES).addSample(key.getKey(), key.hashCode(), 1); - metric.writeLatency.addNano(System.nanoTime() - start); - // CASSANDRA-11117 - certain resolution paths on memtable put can result in very - // large time deltas, either through a variety of sentinel timestamps (used for empty values, ensuring - // a minimal write, etc). This limits the time delta to the max value the histogram - // can bucket correctly. This also filters the Long.MAX_VALUE case where there was no previous value - // to update. - if(timeDelta < Long.MAX_VALUE) - metric.colUpdateTimeDeltaHistogram.update(Math.min(18165375903306L, timeDelta)); - } - - /** - * Purges gc-able top-level and range tombstones, returning `cf` if there are any columns or tombstones left, - * null otherwise. - * @param gcBefore a timestamp (in seconds); tombstones with a localDeletionTime before this will be purged - */ - public static ColumnFamily removeDeletedCF(ColumnFamily cf, int gcBefore) - { - // purge old top-level and range tombstones - cf.purgeTombstones(gcBefore); - - // if there are no columns or tombstones left, return null - return !cf.hasColumns() && !cf.isMarkedForDelete() ? null : cf; - } - - /** - * Removes deleted columns and purges gc-able tombstones. - * @return an updated `cf` if any columns or tombstones remain, null otherwise - */ - public static ColumnFamily removeDeleted(ColumnFamily cf, int gcBefore) - { - return removeDeleted(cf, gcBefore, SecondaryIndexManager.nullUpdater); - } - - /* - This is complicated because we need to preserve deleted columns and columnfamilies - until they have been deleted for at least GC_GRACE_IN_SECONDS. But, we do not need to preserve - their contents; just the object itself as a "tombstone" that can be used to repair other - replicas that do not know about the deletion. - */ - public static ColumnFamily removeDeleted(ColumnFamily cf, int gcBefore, SecondaryIndexManager.Updater indexer) - { - if (cf == null) + try { - return null; + long timeDelta = mt.put(update, indexer, opGroup); + DecoratedKey key = update.partitionKey(); + maybeUpdateRowCache(key); + metric.samplers.get(Sampler.WRITES).addSample(key.getKey(), key.hashCode(), 1); + metric.writeLatency.addNano(System.nanoTime() - start); ++ // CASSANDRA-11117 - certain resolution paths on memtable put can result in very ++ // large time deltas, either through a variety of sentinel timestamps (used for empty values, ensuring ++ // a minimal write, etc). This limits the time delta to the max value the histogram ++ // can bucket correctly. This also filters the Long.MAX_VALUE case where there was no previous value ++ // to update. + if(timeDelta < Long.MAX_VALUE) - metric.colUpdateTimeDeltaHistogram.update(timeDelta); ++ metric.colUpdateTimeDeltaHistogram.update(Math.min(18165375903306L, timeDelta)); } - - return removeDeletedCF(removeDeletedColumnsOnly(cf, gcBefore, indexer), gcBefore); - } - - /** - * Removes only per-cell tombstones, cells that are shadowed by a row-level or range tombstone, or - * columns that have been dropped from the schema (for CQL3 tables only). - * @return the updated ColumnFamily - */ - public static ColumnFamily removeDeletedColumnsOnly(ColumnFamily cf, int gcBefore, SecondaryIndexManager.Updater indexer) - { - BatchRemoveIterator<Cell> iter = cf.batchRemoveIterator(); - DeletionInfo.InOrderTester tester = cf.inOrderDeletionTester(); - boolean hasDroppedColumns = !cf.metadata.getDroppedColumns().isEmpty(); - while (iter.hasNext()) - { - Cell c = iter.next(); - // remove columns if - // (a) the column itself is gcable or - // (b) the column is shadowed by a CF tombstone - // (c) the column has been dropped from the CF schema (CQL3 tables only) - if (c.getLocalDeletionTime() < gcBefore || tester.isDeleted(c) || (hasDroppedColumns && isDroppedColumn(c, cf.metadata()))) - { - iter.remove(); - indexer.remove(c); - } + catch (RuntimeException e) + { + throw new RuntimeException(e.getMessage() + + " for ks: " + + keyspace.getName() + ", table: " + name, e); } - iter.commit(); - return cf; - } - - // returns true if - // 1. this column has been dropped from schema and - // 2. if it has been re-added since then, this particular column was inserted before the last drop - private static boolean isDroppedColumn(Cell c, CFMetaData meta) - { - Long droppedAt = meta.getDroppedColumns().get(c.name().cql3ColumnName(meta)); - return droppedAt != null && c.timestamp() <= droppedAt; - } - - private void removeDroppedColumns(ColumnFamily cf) - { - if (cf == null || cf.metadata.getDroppedColumns().isEmpty()) - return; - BatchRemoveIterator<Cell> iter = cf.batchRemoveIterator(); - while (iter.hasNext()) - if (isDroppedColumn(iter.next(), metadata)) - iter.remove(); - iter.commit(); } /** http://git-wip-us.apache.org/repos/asf/cassandra/blob/7a274dd3/test/unit/org/apache/cassandra/db/ColumnFamilyMetricTest.java ---------------------------------------------------------------------- diff --cc test/unit/org/apache/cassandra/db/ColumnFamilyMetricTest.java index bee747e,2d89e09..2f7aaa5 --- a/test/unit/org/apache/cassandra/db/ColumnFamilyMetricTest.java +++ b/test/unit/org/apache/cassandra/db/ColumnFamilyMetricTest.java @@@ -70,15 -74,59 +70,44 @@@ public class ColumnFamilyMetricTes } // size metrics should show the sum of all SSTable sizes - assertEquals(size, store.metric.liveDiskSpaceUsed.getCount()); - assertEquals(size, store.metric.totalDiskSpaceUsed.getCount()); + assertEquals(size, cfs.metric.liveDiskSpaceUsed.getCount()); + assertEquals(size, cfs.metric.totalDiskSpaceUsed.getCount()); - store.truncateBlocking(); + cfs.truncateBlocking(); // after truncate, size metrics should be down to 0 - Util.spinAssertEquals( - 0L, - new Supplier<Object>() - { - public Long get() - { - return store.metric.liveDiskSpaceUsed.getCount(); - } - }, - 30); - Util.spinAssertEquals( - 0L, - new Supplier<Object>() - { - public Long get() - { - return store.metric.totalDiskSpaceUsed.getCount(); - } - }, - 30); - - store.enableAutoCompaction(); + Util.spinAssertEquals(0L, () -> cfs.metric.liveDiskSpaceUsed.getCount(), 30); + Util.spinAssertEquals(0L, () -> cfs.metric.totalDiskSpaceUsed.getCount(), 30); + + cfs.enableAutoCompaction(); } + + @Test + public void testColUpdateTimeDeltaFiltering() + { + Keyspace keyspace = Keyspace.open("Keyspace1"); + ColumnFamilyStore store = keyspace.getColumnFamilyStore("Standard2"); + + // This confirms another test/set up did not overflow the histogram + store.metric.colUpdateTimeDeltaHistogram.cf.getSnapshot().get999thPercentile(); + - ByteBuffer key = ByteBufferUtil.bytes(4242); - Mutation m = new Mutation("Keyspace1", key); - m.add("Standard2", cellname("0"), ByteBufferUtil.bytes("0"), 0); - m.apply(); ++ new RowUpdateBuilder(store.metadata, 0, "4242") ++ .clustering("0") ++ .add("val", ByteBufferUtil.bytes("0")) ++ .build() ++ .applyUnsafe(); + + // The histogram should not have overflowed on the first write + store.metric.colUpdateTimeDeltaHistogram.cf.getSnapshot().get999thPercentile(); + - m = new Mutation("Keyspace1", key); + // smallest time delta that would overflow the histogram if unfiltered - m.add("Standard2", cellname("0"), ByteBufferUtil.bytes("1"), 18165375903307L); - m.apply(); ++ new RowUpdateBuilder(store.metadata, 18165375903307L, "4242") ++ .clustering("0") ++ .add("val", ByteBufferUtil.bytes("0")) ++ .build() ++ .applyUnsafe(); + + // CASSANDRA-11117 - update with large timestamp delta should not overflow the histogram + store.metric.colUpdateTimeDeltaHistogram.cf.getSnapshot().get999thPercentile(); + } }