This is an automated email from the ASF dual-hosted git repository. mck pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/cassandra.git
commit 1596c37d03d9a6be4b72b1eee357f068ef650159 Merge: 7637acc 3e24c03 Author: Mick Semb Wever <m...@apache.org> AuthorDate: Thu Jan 7 23:58:05 2021 +0100 Merge branch 'cassandra-3.11' into trunk CHANGES.txt | 5 +- doc/source/cql/appendices.rst | 9 +- .../cassandra/db/SinglePartitionReadCommand.java | 35 ++- .../miscellaneous/SSTablesIteratedTest.java | 238 ++++++++++++++++++++- .../cql3/validation/operations/UpdateTest.java | 15 +- 5 files changed, 279 insertions(+), 23 deletions(-) diff --cc CHANGES.txt index 10e7a43,35129a2..46c1d13 --- a/CHANGES.txt +++ b/CHANGES.txt @@@ -1,41 -1,9 +1,44 @@@ -3.11.10 +4.0-beta5 ++ * SSLFactory should initialize SSLContext before setting protocols (CASSANDRA-16362) + * Restore sasi dependencies jflex, snowball-stemmer, and concurrent-trees, in the cassandra-all pom (CASSANDRA-16303) * Fix DecimalDeserializer#toString OOM (CASSANDRA-14925) - * SSLFactory should initialize SSLContext before setting protocols (CASSANDRA-16362) - * Rate limit validation compactions using compaction_throughput_mb_per_sec (CASSANDRA-16161) - * SASI's `max_compaction_flush_memory_in_mb` settings over 100GB revert to default of 1GB (CASSANDRA-16071) ++Merged from 3.11: + Merged from 3.0: + * Fix skipping on pre-3.0 created compact storage sstables due to missing primary key liveness (CASSANDRA-16226) + +4.0-beta4 + * DROP COMPACT STORAGE should invalidate prepared statements still using CompactTableMetadata (CASSANDRA-16361) + * Update default num_tokens to 16 and allocate_tokens_for_local_replication_factor to 3 (CASSANDRA-13701) + * Remove use of String.intern() (CASSANDRA-15810) + * Fix the missing bb position in ByteBufferAccessor.getUnsignedShort (CASSANDRA-16249) + * Make sure OOM errors are rethrown on truncation failure (CASSANDRA-16254) + * Send back client warnings when creating too many tables/keyspaces (CASSANDRA-16309) + * Add dedicated tcp user timeout for streaming connection (CASSANDRA-16143) + * Add generatetokens script for offline token allocation strategy generation (CASSANDRA-16205) + * Remove Windows scripts (CASSANDRA-16171) + * Improve checksumming and compression in protocol V5 (CASSANDRA-15299) + * Optimised repair streaming improvements (CASSANDRA-16274) + * Update jctools dependency to 3.1.0 (CASSANDRA-16255) + * 'SSLEngine closed already' exception on failed outbound connection (CASSANDRA-16277) + * Drain and/or shutdown might throw because of slow messaging service shutdown (CASSANDRA-16276) + * Upgrade JNA to 5.6.0, dropping support for <=glibc-2.6 systems (CASSANDRA-16212) + * Add saved Host IDs to TokenMetadata at startup (CASSANDRA-16246) + * Ensure that CacheMetrics.requests is picked up by the metric reporter (CASSANDRA-16228) + * Add a ratelimiter to snapshot creation and deletion (CASSANDRA-13019) + * Produce consistent tombstone for reads to avoid digest mistmatch (CASSANDRA-15369) + * Fix SSTableloader issue when restoring a table named backups (CASSANDRA-16235) + * Invalid serialized size for responses caused by increasing message time by 1ms which caused extra bytes in size calculation (CASSANDRA-16103) + * Throw BufferOverflowException from DataOutputBuffer for better visibility (CASSANDRA-16214) + * TLS connections to the storage port on a node without server encryption configured causes java.io.IOException accessing missing keystore (CASSANDRA-16144) + * Internode messaging catches OOMs and does not rethrow (CASSANDRA-15214) + * When a table attempts to clean up metrics, it was cleaning up all global table metrics (CASSANDRA-16095) + * Bring back the accepted encryption protocols list as configurable option (CASSANDRA-13325) + * DigestResolver.getData throws AssertionError since dataResponse is null (CASSANDRA-16097) + * Cannot replace_address /X because it doesn't exist in gossip (CASSANDRA-16213) + * cqlsh row_id resets on page boundaries (CASSANDRA-16160) +Merged from 3.11: + * SASI's `max_compaction_flush_memory_in_mb` settings over 100GB revert to default of 1GB (CASSANDRA-16071) +Merged from 3.0: * Extend the exclusion of replica filtering protection to other indices instead of just SASI (CASSANDRA-16311) * Synchronize transaction logs for JBOD (CASSANDRA-16225) * Fix the counting of cells per partition (CASSANDRA-16259) diff --cc src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index b7ff1b6,21b77e3..217f4f1 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@@ -937,27 -1043,20 +937,27 @@@ public class SinglePartitionReadComman boolean removeStatic = false; if (!columns.statics.isEmpty()) { - Row staticRow = searchIter.next(Clustering.STATIC_CLUSTERING); + Row staticRow = result.getRow(Clustering.STATIC_CLUSTERING); - removeStatic = staticRow != null && canRemoveRow(staticRow, columns.statics, sstableTimestamp); + removeStatic = staticRow != null && isRowComplete(staticRow, columns.statics, sstableTimestamp); } - NavigableSet<Clustering> toRemove = null; - for (Clustering clustering : clusterings) + NavigableSet<Clustering<?>> toRemove = null; + try (UnfilteredRowIterator iterator = result.unfilteredIterator(columnFilter(), clusterings, false)) { - Row row = searchIter.next(clustering); - if (row == null || !isRowComplete(row, columns.regulars, sstableTimestamp)) - continue; + while (iterator.hasNext()) + { + Unfiltered unfiltered = iterator.next(); + if (unfiltered == null || !unfiltered.isRow()) + continue; - if (toRemove == null) - toRemove = new TreeSet<>(result.metadata().comparator); - toRemove.add(clustering); + Row row = (Row) unfiltered; - if (!canRemoveRow(row, columns.regulars, sstableTimestamp)) ++ if (!isRowComplete(row, columns.regulars, sstableTimestamp)) + continue; + + if (toRemove == null) + toRemove = new TreeSet<>(result.metadata().comparator); + toRemove.add(row.clustering()); + } } if (!removeStatic && toRemove == null) @@@ -978,21 -1077,40 +978,40 @@@ return new ClusteringIndexNamesFilter(clusterings, filter.isReversed()); } - private boolean canRemoveRow(Row row, Columns requestedColumns, long sstableTimestamp) + /** + * We can stop reading row data from disk if what we've already read is more recent than the max timestamp + * of the next newest SSTable that might have data for the query. We care about 1.) the row timestamp (since + * every query cares if the row exists or not), 2.) the timestamps of the requested cells, and 3.) whether or + * not any of the cells we've read have actual data. + * + * @param row a potentially incomplete {@link Row} + * @param requestedColumns the columns requested by the query + * @param sstableTimestamp the max timestamp of the next newest SSTable to read + * + * @return true if the supplied {@link Row} is complete and its data more recent than the supplied timestamp + */ + private boolean isRowComplete(Row row, Columns requestedColumns, long sstableTimestamp) { - // We can remove a row if it has data that is more recent that the next sstable to consider for the data that the query - // cares about. And the data we care about is 1) the row timestamp (since every query cares if the row exists or not) - // and 2) the requested columns. - if (row.primaryKeyLivenessInfo().isEmpty() || row.primaryKeyLivenessInfo().timestamp() <= sstableTimestamp) + // Note that compact tables will always have an empty primary key liveness info. + if (!row.primaryKeyLivenessInfo().isEmpty() && row.primaryKeyLivenessInfo().timestamp() <= sstableTimestamp) return false; + boolean hasLiveCell = false; + - for (ColumnDefinition column : requestedColumns) + for (ColumnMetadata column : requestedColumns) { - Cell cell = row.getCell(column); + Cell<?> cell = row.getCell(column); + if (cell == null || cell.timestamp() <= sstableTimestamp) return false; + + if (!cell.isTombstone()) + hasLiveCell = true; } - return true; + + // If we've gotten here w/ a compact table or at least one non-tombstone cell, the row is considered + // complete and we can avoid any further searching of older SSTables. - return hasLiveCell || !metadata().isCQLTable(); ++ return hasLiveCell || metadata().isCompactTable(); } @Override diff --cc test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java index d363ecf,72d2909..2f53cbb --- a/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/miscellaneous/SSTablesIteratedTest.java @@@ -554,4 -555,233 +555,235 @@@ public class SSTablesIteratedTest exten executeAndCheck(base + String.format("WHERE id=%d AND col < 40 LIMIT 1", i), 1, row(i, 30, "30")); } } + + @Test + public void testNonCompactTableRowDeletion() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, ck))"); + + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')"); + flush(); + + execute("DELETE FROM %s WHERE pk = 1 AND ck = 1"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2); + } + + @Test + public void testNonCompactTableRangeDeletion() throws Throwable + { + createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY (a, b, c))"); + + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1); + flush(); + + execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 2); + } + + @Test + public void testNonCompactTableCellsDeletion() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, v1 text, v2 text, PRIMARY KEY (pk, ck))"); + + execute("INSERT INTO %s (pk, ck, v1, v2) VALUES (1, 1, '1', '1')"); + flush(); + + execute("DELETE v1 FROM %s WHERE pk = 1 AND ck = 1"); + execute("DELETE v2 FROM %s WHERE pk = 1 AND ck = 1"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2, row(1, 1, null, null)); + } + + @Test + public void testCompactTableSkipping() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, ck)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1') USING TIMESTAMP 1000000"); + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 50, '2') USING TIMESTAMP 1000001"); + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 100, '3') USING TIMESTAMP 1000002"); + flush(); + + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 2, '4') USING TIMESTAMP 2000000"); + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 51, '5') USING TIMESTAMP 2000001"); + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 101, '6') USING TIMESTAMP 2000002"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 51, "5")); + + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 51, "5")); + } + + @Test + public void testCompactTableSkippingPkOnly() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (pk, ck) VALUES (1, 1) USING TIMESTAMP 1000000"); + execute("INSERT INTO %s (pk, ck) VALUES (1, 50) USING TIMESTAMP 1000001"); + execute("INSERT INTO %s (pk, ck) VALUES (1, 100) USING TIMESTAMP 1000002"); + flush(); + + execute("INSERT INTO %s (pk, ck) VALUES (1, 2) USING TIMESTAMP 2000000"); + execute("INSERT INTO %s (pk, ck) VALUES (1, 51) USING TIMESTAMP 2000001"); + execute("INSERT INTO %s (pk, ck) VALUES (1, 101) USING TIMESTAMP 2000002"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 51)); + + execute("ALTER TABLE %s DROP COMPACT STORAGE"); - executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 51)); ++ ++ // Dropping CS exposes a previously hidden/implicit field, so take that into account. ++ executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 1, row(1, 51, null)); + } + + @Test + public void testCompactTableCellDeletion() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, ck)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')"); + flush(); + + execute("DELETE v FROM %s WHERE pk = 1 AND ck = 1"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1); + + // Dropping compact storage forces us to hit an extra SSTable, since we can't rely on the isDense flag + // to determine that a row with a complete set of column deletes is complete. + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2); + } + + @Test + public void testCompactTableRowDeletion() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, ck)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')"); + flush(); + + execute("DELETE FROM %s WHERE pk = 1 AND ck = 1"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1); + + // Dropping compact storage forces us to hit an extra SSTable, since we can't rely on the isDense flag + // to determine that a row with a complete set of column deletes is complete. + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 2); + } + + @Test + public void testCompactTableRangeDeletion() throws Throwable + { + createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY (a, b, c)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1); + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 2, 1); + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 2, 1, 1); + flush(); + + execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1); + flush(); + + // Even with a compact table, we can't short-circuit for a range deletion rather than a cell tombstone. + executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 2); + + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 2); + } + + @Test + public void testCompactTableRangeOverRowDeletion() throws Throwable + { + createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY (a, b, c)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1); + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 2, 1); + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 2, 1, 1); + flush(); + + execute("DELETE FROM %s WHERE a=? AND b=? AND c=?", 1, 1, 1); + flush(); + + execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1); + flush(); + + // The range delete will subsume the row delete, and the latter will not factor into skipping decisions. + executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 3); + + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 3); + } + + @Test + public void testCompactTableRowOverRangeDeletion() throws Throwable + { + createTable("CREATE TABLE %s (a int, b int, c int, d int, PRIMARY KEY (a, b, c)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 1, 1); + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 1, 2, 1); + execute("INSERT INTO %s (a, b, c, d) VALUES (?, ?, ?, ?)", 1, 2, 1, 1); + flush(); + + execute("DELETE FROM %s WHERE a=? AND b=?", 1, 1); + flush(); + + execute("DELETE FROM %s WHERE a=? AND b=? AND c=?", 1, 1, 1); + flush(); + + // The row delete provides a tombstone, which is enough information to short-circuit after the first SSTable. + executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 1); + + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE a=1 AND b=1 AND c=1", 3); + } + + @Test + public void testCompactTableCellUpdate() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, v text, PRIMARY KEY (pk, ck)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (pk, ck, v) VALUES (1, 1, '1')"); + flush(); + + execute("UPDATE %s SET v = '2' WHERE pk = 1 AND ck = 1"); + flush(); + + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1, row(1, 1, "2")); + + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 1", 1, row(1, 1, "2")); + } + + @Test + public void testCompactTableDeleteOverlappingSSTables() throws Throwable + { + createTable("CREATE TABLE %s (pk int, ck int, PRIMARY KEY (pk, ck)) WITH COMPACT STORAGE"); + + execute("INSERT INTO %s (pk, ck) VALUES (1, 51) USING TIMESTAMP 1000002"); + flush(); + execute("DELETE FROM %s WHERE pk = 1 AND ck = 51"); + flush(); + + execute("INSERT INTO %s (pk, ck) VALUES (1, 51) USING TIMESTAMP 1000001"); + execute("INSERT INTO %s (pk, ck) VALUES (2, 51)"); + flush(); + + // If it weren't for the write to pk = 2, ck = 51, we could skip the third SSTable too and hit only one here. + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 2); + + // Dropping compact storage forces us to hit an extra SSTable, since we can't rely on the isDense flag + // to determine that a row with a complete set of column deletes is complete. + execute("ALTER TABLE %s DROP COMPACT STORAGE"); + executeAndCheck("SELECT * FROM %s WHERE pk = 1 AND ck = 51", 3); + } } diff --cc test/unit/org/apache/cassandra/cql3/validation/operations/UpdateTest.java index 973531b,c9c8051..ee95e2d --- a/test/unit/org/apache/cassandra/cql3/validation/operations/UpdateTest.java +++ b/test/unit/org/apache/cassandra/cql3/validation/operations/UpdateTest.java @@@ -29,9 -28,11 +28,9 @@@ import org.apache.cassandra.cql3.Untype import org.apache.cassandra.cql3.UntypedResultSet.Row; import org.apache.cassandra.db.ColumnFamilyStore; import org.apache.cassandra.db.Keyspace; -import org.apache.cassandra.utils.ByteBufferUtil; --import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.junit.Assert.assertTrue; + import static org.junit.Assert.assertEquals; public class UpdateTest extends CQLTester { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org For additional commands, e-mail: commits-h...@cassandra.apache.org